Index: conf/options.i386 =================================================================== --- conf/options.i386 (.../stable/6/sys) (revision 184012) +++ conf/options.i386 (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -163,3 +163,6 @@ # Debugging KDB_STOP_NMI opt_kdb.h NPX_DEBUG opt_npx.h + +NATIVE opt_global.h +XEN opt_global.h Index: conf/kern.pre.mk =================================================================== --- conf/kern.pre.mk (.../stable/6/sys) (revision 184012) +++ conf/kern.pre.mk (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -20,12 +20,12 @@ COPTFLAGS?= -O .else . if defined(DEBUG) -_MINUS_O= -O +_MINUS_O= -O -fno-optimize-sibling-calls . else _MINUS_O= -O2 . endif . if ${MACHINE_ARCH} == "amd64" -COPTFLAGS?=-O2 -frename-registers -pipe +COPTFLAGS?=${_MINUS_O} -frename-registers -pipe . else COPTFLAGS?=${_MINUS_O} -pipe . endif @@ -70,6 +70,9 @@ # .. and the same for em INCLUDES+= -I$S/dev/em +INCLUDES+= -I$S/xen/interface -I$S/xen/interface/io -I$S/xen/interface/hvm + + CFLAGS= ${COPTFLAGS} ${CWARNFLAGS} ${DEBUG} CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h .if ${CC} != "icc" Index: conf/files.i386 =================================================================== --- conf/files.i386 (.../stable/6/sys) (revision 184012) +++ conf/files.i386 (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -291,8 +291,8 @@ i386/i386/atomic.c standard \ compile-with "${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}" i386/i386/autoconf.c standard -i386/i386/bios.c standard -i386/i386/bioscall.s standard +i386/i386/bios.c optional native +i386/i386/bioscall.s optional native i386/i386/busdma_machdep.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb @@ -301,7 +301,8 @@ i386/i386/elan-mmcr.c optional cpu_elan i386/i386/elan-mmcr.c optional cpu_soekris i386/i386/elf_machdep.c standard -i386/i386/exception.s standard +i386/i386/exception.s optional native +i386/xen/exception.s optional xen i386/i386/gdb_machdep.c optional gdb i386/i386/geode.c optional cpu_geode i386/i386/i686_mem.c optional mem @@ -314,22 +315,27 @@ i386/i386/k6_mem.c optional mem i386/i386/legacy.c standard i386/i386/local_apic.c optional apic -i386/i386/locore.s standard no-obj +i386/i386/locore.s optional native no-obj +i386/xen/locore.s optional xen no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp -i386/i386/mp_machdep.c optional smp +i386/i386/mp_machdep.c optional native smp +i386/xen/mp_machdep.c optional xen smp i386/i386/mp_watchdog.c optional mp_watchdog smp -i386/i386/mpboot.s optional smp -i386/i386/mptable.c optional apic +i386/i386/mpboot.s optional native smp +i386/xen/mptable.c optional apic xen +i386/i386/mptable.c optional apic native i386/i386/mptable_pci.c optional apic pci i386/i386/msi.c optional apic pci i386/i386/nexus.c standard i386/i386/perfmon.c optional perfmon i386/i386/perfmon.c optional perfmon profiling-routine -i386/i386/pmap.c standard +i386/i386/pmap.c optional native +i386/xen/pmap.c optional xen +i386/xen/xen_machdep.c optional xen i386/i386/ptrace_machdep.c standard i386/i386/support.s standard i386/i386/swtch.s standard @@ -358,9 +364,10 @@ i386/ibcs2/ibcs2_xenix.c optional ibcs2 i386/ibcs2/ibcs2_xenix_sysent.c optional ibcs2 i386/ibcs2/imgact_coff.c optional ibcs2 -i386/isa/atpic.c standard +i386/isa/atpic.c optional atpic #i386/isa/atpic_vector.s standard -i386/isa/clock.c standard +i386/isa/clock.c optional native +i386/xen/clock.c optional xen i386/isa/elcr.c standard i386/isa/elink.c optional ep i386/isa/elink.c optional ie Index: conf/files =================================================================== --- conf/files (.../stable/6/sys) (revision 184012) +++ conf/files (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -1475,6 +1475,7 @@ libkern/strcat.c standard libkern/strcmp.c standard libkern/strcpy.c standard +libkern/strcspn.c standard libkern/strdup.c standard libkern/strlcat.c standard libkern/strlcpy.c standard @@ -2043,4 +2044,41 @@ xdr/xdr_mbuf.c optional nfslockd xdr/xdr_mem.c optional nfslockd xdr/xdr_reference.c optional nfslockd -xdr/xdr_sizeof.c optional nfslockd \ No newline at end of file +xdr/xdr_sizeof.c optional nfslockd + + +xen/gnttab.c optional xen +xen/features.c optional xen +xen/evtchn/evtchn.c optional xen +xen/evtchn/evtchn_dev.c optional xen +xen/reboot.c optional xen +xen/xenbus/xenbus_client.c optional xen +xen/xenbus/xenbus_comms.c optional xen +xen/xenbus/xenbus_dev.c optional xen +xen/xenbus/xenbus_if.m optional xen +xen/xenbus/xenbus_probe.c optional xen +#xen/xenbus/xenbus_probe_backend.c optional xen +xen/xenbus/xenbus_xs.c optional xen +dev/xen/balloon/balloon.c optional xen +dev/xen/balloon/balloon.c optional xenhvm +dev/xen/console/console.c optional xen +dev/xen/console/xencons_ring.c optional xen +dev/xen/blkfront/blkfront.c optional xen +dev/xen/netfront/netfront.c optional xen +dev/xen/blkfront/blkfront.c optional xenhvm +dev/xen/netfront/netfront.c optional xenhvm + +xen/gnttab.c optional xenhvm +xen/features.c optional xenhvm +dev/xen/xenpci/evtchn.c optional xenhvm +dev/xen/xenpci/machine_reboot.c optional xenhvm +xen/evtchn/evtchn_dev.c optional xenhvm +xen/reboot.c optional xenhvm +xen/xenbus/xenbus_client.c optional xenhvm +xen/xenbus/xenbus_comms.c optional xenhvm +xen/xenbus/xenbus_dev.c optional xenhvm +xen/xenbus/xenbus_if.m optional xenhvm +xen/xenbus/xenbus_probe.c optional xenhvm +#xen/xenbus/xenbus_probe_backend.c optional xenhvm +xen/xenbus/xenbus_xs.c optional xenhvm +dev/xen/xenpci/xenpci.c optional xenpci Index: conf/options.amd64 =================================================================== --- conf/options.amd64 (.../stable/6/sys) (revision 184012) +++ conf/options.amd64 (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -58,3 +58,5 @@ # Debugging KDB_STOP_NMI opt_kdb.h + +XENHVM opt_global.h Index: kern/kern_timeout.c =================================================================== --- kern/kern_timeout.c (.../stable/6/sys) (revision 184012) +++ kern/kern_timeout.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -557,7 +557,7 @@ mtx_unlock_spin(&callout_lock); sleepq_add(&callout_wait, &callout_lock.mtx_object, "codrain", - SLEEPQ_MSLEEP, 0); + SLEEPQ_SLEEP, 0); sleepq_wait(&callout_wait); sq_locked = 0; Index: kern/kern_mutex.c =================================================================== --- kern/kern_mutex.c (.../stable/6/sys) (revision 184012) +++ kern/kern_mutex.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -92,25 +92,66 @@ #ifdef DDB static void db_show_mtx(struct lock_object *lock); #endif +static void lock_mtx(struct lock_object *lock, int how); +static void lock_spin(struct lock_object *lock, int how); +static int unlock_mtx(struct lock_object *lock); +static int unlock_spin(struct lock_object *lock); + /* * Lock classes for sleep and spin mutexes. */ struct lock_class lock_class_mtx_sleep = { - "sleep mutex", - LC_SLEEPLOCK | LC_RECURSABLE, + .lc_name = "sleep mutex", + .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE, #ifdef DDB - db_show_mtx + .lc_ddb_show = db_show_mtx, #endif + .lc_lock = lock_mtx, + .lc_unlock = unlock_mtx, }; struct lock_class lock_class_mtx_spin = { - "spin mutex", - LC_SPINLOCK | LC_RECURSABLE, + .lc_name = "spin mutex", + .lc_flags = LC_SPINLOCK | LC_RECURSABLE, #ifdef DDB - db_show_mtx + .lc_ddb_show = db_show_mtx, #endif + .lc_lock = lock_spin, + .lc_unlock = unlock_spin, }; +void +lock_mtx(struct lock_object *lock, int how) +{ + + mtx_lock((struct mtx *)lock); +} + +void +lock_spin(struct lock_object *lock, int how) +{ + + panic("spin locks can only use msleep_spin"); +} + +int +unlock_mtx(struct lock_object *lock) +{ + struct mtx *m; + + m = (struct mtx *)lock; + mtx_assert(m, MA_OWNED | MA_NOTRECURSED); + mtx_unlock(m); + return (0); +} + +int +unlock_spin(struct lock_object *lock) +{ + + panic("spin locks can only use msleep_spin"); +} + /* * System-wide mutexes */ Index: kern/kern_synch.c =================================================================== --- kern/kern_synch.c (.../stable/6/sys) (revision 184012) +++ kern/kern_synch.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -64,11 +64,18 @@ #include +#ifdef XEN +#include +#include +#include +#endif + static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL) int hogticks; int lbolt; +static int pause_wchan; static struct callout loadav_callout; static struct callout lbolt_callout; @@ -100,7 +107,144 @@ init_sleepqueues(); } + /* + * General sleep call. Suspends the current thread until a wakeup is + * performed on the specified identifier. The thread will then be made + * runnable with the specified priority. Sleeps at most timo/hz seconds + * (0 means no timeout). If pri includes PCATCH flag, signals are checked + * before and after sleeping, else signals are not checked. Returns 0 if + * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a + * signal needs to be delivered, ERESTART is returned if the current system + * call should be restarted if possible, and EINTR is returned if the system + * call should be interrupted by the signal (return EINTR). + * + * The lock argument is unlocked before the caller is suspended, and + * re-locked before _sleep() returns. If priority includes the PDROP + * flag the lock is not re-locked before returning. + */ +int +_sleep(void *ident, struct lock_object *lock, int priority, + const char *wmesg, int timo) +{ + struct thread *td; + struct proc *p; + struct lock_class *class; + int catch, flags, lock_state, pri, rval; + WITNESS_SAVE_DECL(lock_witness); + + td = curthread; + p = td->td_proc; +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) + ktrcsw(1, 0); +#endif + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock, + "Sleeping on \"%s\"", wmesg); + KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL || + ident == &lbolt, ("sleeping without a lock")); + KASSERT(p != NULL, ("msleep1")); + KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); + if (lock != NULL) + class = LOCK_CLASS(lock); + else + class = NULL; + + if (cold) { + /* + * During autoconfiguration, just return; + * don't run any other threads or panic below, + * in case this is the idle thread and already asleep. + * XXX: this used to do "s = splhigh(); splx(safepri); + * splx(s);" to give interrupts a chance, but there is + * no way to give interrupts a chance now. + */ + if (lock != NULL && priority & PDROP) + class->lc_unlock(lock); + return (0); + } + catch = priority & PCATCH; + rval = 0; + + /* + * If we are already on a sleep queue, then remove us from that + * sleep queue first. We have to do this to handle recursive + * sleeps. + */ + if (TD_ON_SLEEPQ(td)) + sleepq_remove(td, td->td_wchan); + + if (ident == &pause_wchan) + flags = SLEEPQ_PAUSE; + else + flags = SLEEPQ_SLEEP; + if (catch) + flags |= SLEEPQ_INTERRUPTIBLE; + + sleepq_lock(ident); + CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)", + td->td_tid, p->p_pid, p->p_comm, wmesg, ident); + + DROP_GIANT(); + if (lock != NULL && !(class->lc_flags & LC_SLEEPABLE)) { + WITNESS_SAVE(lock, lock_witness); + lock_state = class->lc_unlock(lock); + } else + /* GCC needs to follow the Yellow Brick Road */ + lock_state = -1; + + /* + * We put ourselves on the sleep queue and start our timeout + * before calling thread_suspend_check, as we could stop there, + * and a wakeup or a SIGCONT (or both) could occur while we were + * stopped without resuming us. Thus, we must be ready for sleep + * when cursig() is called. If the wakeup happens while we're + * stopped, then td will no longer be on a sleep queue upon + * return from cursig(). + */ + sleepq_add(ident, ident == &lbolt ? NULL : lock, wmesg, flags, 0); + if (timo) + sleepq_set_timeout(ident, timo); + if (lock != NULL && class->lc_flags & LC_SLEEPABLE) { + sleepq_release(ident); + WITNESS_SAVE(lock, lock_witness); + lock_state = class->lc_unlock(lock); + sleepq_lock(ident); + } + + /* + * Adjust this thread's priority, if necessary. + */ + pri = priority & PRIMASK; + if (pri != 0 && pri != td->td_priority) { + mtx_lock_spin(&sched_lock); + sched_prio(td, pri); + mtx_unlock_spin(&sched_lock); + } + + if (timo && catch) + rval = sleepq_timedwait_sig(ident); + else if (timo) + rval = sleepq_timedwait(ident); + else if (catch) + rval = sleepq_wait_sig(ident); + else { + sleepq_wait(ident); + rval = 0; + } +#ifdef KTRACE + if (KTRPOINT(td, KTR_CSW)) + ktrcsw(0, 0); +#endif + PICKUP_GIANT(); + if (lock != NULL && !(priority & PDROP)) { + class->lc_lock(lock, lock_state); + WITNESS_RESTORE(lock, lock_witness); + } + return (rval); +} + +/* * General sleep call. Suspends the current process until a wakeup is * performed on the specified identifier. The process will then be made * runnable with the specified priority. Sleeps at most timo/hz seconds @@ -164,7 +308,7 @@ if (TD_ON_SLEEPQ(td)) sleepq_remove(td, td->td_wchan); - flags = SLEEPQ_MSLEEP; + flags = SLEEPQ_SLEEP; if (catch) flags |= SLEEPQ_INTERRUPTIBLE; @@ -265,7 +409,7 @@ /* * We put ourselves on the sleep queue and start our timeout. */ - sleepq_add(ident, &mtx->mtx_object, wmesg, SLEEPQ_MSLEEP, 0); + sleepq_add(ident, &mtx->mtx_object, wmesg, SLEEPQ_SLEEP, 0); if (timo) sleepq_set_timeout(ident, timo); @@ -314,7 +458,7 @@ { sleepq_lock(ident); - sleepq_broadcast(ident, SLEEPQ_MSLEEP, -1, 0); + sleepq_broadcast(ident, SLEEPQ_SLEEP, -1, 0); } /* @@ -328,7 +472,7 @@ { sleepq_lock(ident); - sleepq_signal(ident, SLEEPQ_MSLEEP, -1, 0); + sleepq_signal(ident, SLEEPQ_SLEEP, -1, 0); } /* @@ -417,6 +561,9 @@ td, td->td_proc->p_comm, td->td_priority, td->td_inhibitors, td->td_wmesg, td->td_lockname); #endif +#ifdef XEN + PT_UPDATES_FLUSH(); +#endif sched_switch(td, newtd, flags); CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d", td, td->td_proc->p_comm, td->td_priority); Index: kern/subr_trap.c =================================================================== --- kern/subr_trap.c (.../stable/6/sys) (revision 184012) +++ kern/subr_trap.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -67,6 +67,12 @@ #include #include +#ifdef XEN +#include +#include +#include +#endif + /* * Define the code needed before returning to user mode, for * trap and syscall. @@ -139,6 +145,9 @@ sched_userret(td); KASSERT(td->td_locks == 0, ("userret: Returning with %d locks held.", td->td_locks)); +#ifdef XEN + PT_UPDATES_FLUSH(); +#endif } /* Index: kern/kern_rwlock.c =================================================================== --- kern/kern_rwlock.c (.../stable/6/sys) (revision 184012) +++ kern/kern_rwlock.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -60,13 +60,19 @@ static void db_show_rwlock(struct lock_object *lock); #endif +static void lock_rw(struct lock_object *lock, int how); +static int unlock_rw(struct lock_object *lock); + struct lock_class lock_class_rw = { .lc_name = "rw", .lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE, #ifdef DDB .lc_ddb_show = db_show_rwlock, #endif + .lc_lock = lock_rw, + .lc_unlock = unlock_rw }; + /* * Return a pointer to the owning thread if the lock is write-locked or @@ -99,6 +105,34 @@ #endif void +lock_rw(struct lock_object *lock, int how) +{ + struct rwlock *rw; + + rw = (struct rwlock *)lock; + if (how) + rw_wlock(rw); + else + rw_rlock(rw); +} + +int +unlock_rw(struct lock_object *lock) +{ + struct rwlock *rw; + + rw = (struct rwlock *)lock; + rw_assert(rw, RA_LOCKED | LA_NOTRECURSED); + if (rw->rw_lock & RW_LOCK_READ) { + rw_runlock(rw); + return (0); + } else { + rw_wunlock(rw); + return (1); + } +} + +void rw_init_flags(struct rwlock *rw, const char *name, int opts) { int flags; Index: kern/kern_sx.c =================================================================== --- kern/kern_sx.c (.../stable/6/sys) (revision 184012) +++ kern/kern_sx.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -108,12 +108,17 @@ static void db_show_sx(struct lock_object *lock); #endif +static void lock_sx(struct lock_object *lock, int how); +static int unlock_sx(struct lock_object *lock); + struct lock_class lock_class_sx = { .lc_name = "sx", .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE, #ifdef DDB .lc_ddb_show = db_show_sx, #endif + .lc_lock = lock_sx, + .lc_unlock = unlock_sx, }; #ifndef INVARIANTS @@ -121,6 +126,34 @@ #endif void +lock_sx(struct lock_object *lock, int how) +{ + struct sx *sx; + + sx = (struct sx *)lock; + if (how) + sx_xlock(sx); + else + sx_slock(sx); +} + +int +unlock_sx(struct lock_object *lock) +{ + struct sx *sx; + + sx = (struct sx *)lock; + sx_assert(sx, SA_LOCKED | SA_NOTRECURSED); + if (sx_xlocked(sx)) { + sx_xunlock(sx); + return (1); + } else { + sx_sunlock(sx); + return (0); + } +} + +void sx_sysinit(void *arg) { struct sx_args *sargs = arg; @@ -845,6 +878,7 @@ } } +#if 0 /* * Atomically drop an sx lock while going to sleep. This is just a hack * for 6.x. In 7.0 and later this is done more cleanly. @@ -961,6 +995,7 @@ } return (rval); } +#endif #ifdef INVARIANT_SUPPORT #ifndef INVARIANTS Index: kern/kern_fork.c =================================================================== --- kern/kern_fork.c (.../stable/6/sys) (revision 184012) +++ kern/kern_fork.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -112,10 +112,15 @@ struct thread *td; struct vfork_args *uap; { - int error; + int error, flags; struct proc *p2; - error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2); +#ifdef XEN + flags = RFFDG | RFPROC; +#else + flags = RFFDG | RFPROC | RFPPWAIT | RFMEM; +#endif + error = fork1(td, flags, 0, &p2); if (error == 0) { td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; Index: kern/kern_lock.c =================================================================== --- kern/kern_lock.c (.../stable/6/sys) (revision 184012) +++ kern/kern_lock.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -62,11 +62,48 @@ #include #endif + +#ifdef DDB +#include +static void db_show_lockmgr(struct lock_object *lock); +#endif +static void lock_lockmgr(struct lock_object *lock, int how); +static int unlock_lockmgr(struct lock_object *lock); + +struct lock_class lock_class_lockmgr = { + .lc_name = "lockmgr", + .lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE, +#ifdef DDB + .lc_ddb_show = db_show_lockmgr, +#endif + .lc_lock = lock_lockmgr, + .lc_unlock = unlock_lockmgr, +}; + /* * Locking primitives implementation. * Locks provide shared/exclusive sychronization. */ +void +lock_lockmgr(struct lock_object *lock, int how) +{ + + panic("lockmgr locks do not support sleep interlocking"); +} + +int +unlock_lockmgr(struct lock_object *lock) +{ + + panic("lockmgr locks do not support sleep interlocking"); +} + +/* + * Locking primitives implementation. + * Locks provide shared/exclusive sychronization. + */ + #define COUNT(td, x) if ((td)) (td)->td_locks += (x) #define LK_ALL (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | \ LK_SHARE_NONZERO | LK_WAIT_NONZERO) @@ -639,14 +676,13 @@ return (1); } -DB_SHOW_COMMAND(lockmgr, db_show_lockmgr) +void +db_show_lockmgr(struct lock_object *lock) { struct thread *td; struct lock *lkp; - if (!have_addr) - return; - lkp = (struct lock *)addr; + lkp = (struct lock *)lock; db_printf("lock type: %s\n", lkp->lk_wmesg); db_printf("state: "); Index: kern/kern_condvar.c =================================================================== --- kern/kern_condvar.c (.../stable/6/sys) (revision 184012) +++ kern/kern_condvar.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -124,8 +124,7 @@ DROP_GIANT(); mtx_unlock(mp); - sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, - 0); + sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, 0); sleepq_wait(cvp); #ifdef KTRACE @@ -232,8 +231,7 @@ DROP_GIANT(); mtx_unlock(mp); - sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, - 0); + sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, 0); sleepq_set_timeout(cvp, timo); rval = sleepq_timedwait(cvp); Index: dev/xen/netfront/mbufq.h =================================================================== --- dev/xen/netfront/mbufq.h (.../stable/6/sys) (revision 0) +++ dev/xen/netfront/mbufq.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,123 @@ +/************************************************************************** + +Copyright (c) 2007, Chelsio Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Neither the name of the Chelsio Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + +$FreeBSD$ + +***************************************************************************/ + +#ifndef CXGB_MBUFQ_H_ +#define CXGB_MBUFQ_H_ + +struct mbuf_head { + struct mbuf *head; + struct mbuf *tail; + uint32_t qlen; + uint32_t qsize; + struct mtx lock; +}; + +static __inline void +mbufq_init(struct mbuf_head *l) +{ + l->head = l->tail = NULL; + l->qlen = l->qsize = 0; +} + +static __inline int +mbufq_empty(struct mbuf_head *l) +{ + return (l->head == NULL); +} + +static __inline int +mbufq_len(struct mbuf_head *l) +{ + return (l->qlen); +} + +static __inline int +mbufq_size(struct mbuf_head *l) +{ + return (l->qsize); +} + +static __inline int +mbufq_head_size(struct mbuf_head *l) +{ + return (l->head ? l->head->m_pkthdr.len : 0); +} + +static __inline void +mbufq_tail(struct mbuf_head *l, struct mbuf *m) +{ + l->qlen++; + if (l->head == NULL) + l->head = m; + else + l->tail->m_nextpkt = m; + l->tail = m; + l->qsize += m->m_pkthdr.len; +} + +static __inline struct mbuf * +mbufq_dequeue(struct mbuf_head *l) +{ + struct mbuf *m; + + m = l->head; + if (m) { + if (m == l->tail) + l->head = l->tail = NULL; + else + l->head = m->m_nextpkt; + m->m_nextpkt = NULL; + l->qlen--; + l->qsize -= m->m_pkthdr.len; + } + + return (m); +} + +static __inline struct mbuf * +mbufq_peek(struct mbuf_head *l) +{ + return (l->head); +} + +static __inline void +mbufq_append(struct mbuf_head *a, struct mbuf_head *b) +{ + if (a->tail) + a->tail->m_nextpkt = b->head; + if (b->tail) + a->tail = b->tail; + a->qlen += b->qlen; + a->qsize += b->qsize; + + +} +#endif /* CXGB_MBUFQ_H_ */ Property changes on: dev/xen/netfront/mbufq.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/netfront/netfront.c =================================================================== --- dev/xen/netfront/netfront.c (.../stable/6/sys) (revision 0) +++ dev/xen/netfront/netfront.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1988 @@ +/* + * + * Copyright (c) 2004-2006 Kip Macy + * All rights reserved. + * + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#if __FreeBSD_version >= 700000 +#include +#include +#endif + +#include +#include + +#include /* for DELAY */ +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "xenbus_if.h" + +#define XN_CSUM_FEATURES (CSUM_TCP | CSUM_UDP | CSUM_TSO) + +#define GRANT_INVALID_REF 0 + +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) + +#if __FreeBSD_version >= 700000 +/* + * Should the driver do LRO on the RX end + * this can be toggled on the fly, but the + * interface must be reset (down/up) for it + * to take effect. + */ +static int xn_enable_lro = 1; +TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro); +#else + +#define IFCAP_TSO4 0 +#define CSUM_TSO 0 + +#endif + +#ifdef CONFIG_XEN +static int MODPARM_rx_copy = 0; +module_param_named(rx_copy, MODPARM_rx_copy, bool, 0); +MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)"); +static int MODPARM_rx_flip = 0; +module_param_named(rx_flip, MODPARM_rx_flip, bool, 0); +MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)"); +#else +static const int MODPARM_rx_copy = 1; +static const int MODPARM_rx_flip = 0; +#endif + +#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) +#define RX_COPY_THRESHOLD 256 + +#define net_ratelimit() 0 + +struct netfront_info; +struct netfront_rx_info; + +static void xn_txeof(struct netfront_info *); +static void xn_rxeof(struct netfront_info *); +static void network_alloc_rx_buffers(struct netfront_info *); + +static void xn_tick_locked(struct netfront_info *); +static void xn_tick(void *); + +static void xn_intr(void *); +static void xn_start_locked(struct ifnet *); +static void xn_start(struct ifnet *); +static int xn_ioctl(struct ifnet *, u_long, caddr_t); +static void xn_ifinit_locked(struct netfront_info *); +static void xn_ifinit(void *); +static void xn_stop(struct netfront_info *); +#ifdef notyet +static void xn_watchdog(struct ifnet *); +#endif + +static void show_device(struct netfront_info *sc); +#ifdef notyet +static void netfront_closing(device_t dev); +#endif +static void netif_free(struct netfront_info *info); +static int netfront_detach(device_t dev); + +static int talk_to_backend(device_t dev, struct netfront_info *info); +static int create_netdev(device_t dev); +static void netif_disconnect_backend(struct netfront_info *info); +static int setup_device(device_t dev, struct netfront_info *info); +static void end_access(int ref, void *page); + +/* Xenolinux helper functions */ +int network_connect(struct netfront_info *); + +static void xn_free_rx_ring(struct netfront_info *); + +static void xn_free_tx_ring(struct netfront_info *); + +static int xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list, + int *pages_flipped_p); + +#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) + +#define INVALID_P2M_ENTRY (~0UL) + +/* + * Mbuf pointers. We need these to keep track of the virtual addresses + * of our mbuf chains since we can only convert from virtual to physical, + * not the other way around. The size must track the free index arrays. + */ +struct xn_chain_data { + struct mbuf *xn_tx_chain[NET_TX_RING_SIZE+1]; + struct mbuf *xn_rx_chain[NET_RX_RING_SIZE+1]; +}; + + +struct net_device_stats +{ + u_long rx_packets; /* total packets received */ + u_long tx_packets; /* total packets transmitted */ + u_long rx_bytes; /* total bytes received */ + u_long tx_bytes; /* total bytes transmitted */ + u_long rx_errors; /* bad packets received */ + u_long tx_errors; /* packet transmit problems */ + u_long rx_dropped; /* no space in linux buffers */ + u_long tx_dropped; /* no space available in linux */ + u_long multicast; /* multicast packets received */ + u_long collisions; + + /* detailed rx_errors: */ + u_long rx_length_errors; + u_long rx_over_errors; /* receiver ring buff overflow */ + u_long rx_crc_errors; /* recved pkt with crc error */ + u_long rx_frame_errors; /* recv'd frame alignment error */ + u_long rx_fifo_errors; /* recv'r fifo overrun */ + u_long rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + u_long tx_aborted_errors; + u_long tx_carrier_errors; + u_long tx_fifo_errors; + u_long tx_heartbeat_errors; + u_long tx_window_errors; + + /* for cslip etc */ + u_long rx_compressed; + u_long tx_compressed; +}; + +struct netfront_info { + + struct ifnet *xn_ifp; +#if __FreeBSD_version >= 700000 + struct lro_ctrl xn_lro; +#endif + + struct net_device_stats stats; + u_int tx_full; + + netif_tx_front_ring_t tx; + netif_rx_front_ring_t rx; + + struct mtx tx_lock; + struct mtx rx_lock; + struct sx sc_lock; + + u_int handle; + u_int irq; + u_int copying_receiver; + u_int carrier; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 32 +#define RX_MAX_TARGET NET_RX_RING_SIZE + int rx_min_target, rx_max_target, rx_target; + + /* + * {tx,rx}_skbs store outstanding skbuffs. The first entry in each + * array is an index into a chain of free entries. + */ + + grant_ref_t gref_tx_head; + grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; + grant_ref_t gref_rx_head; + grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; + +#define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256) + device_t xbdev; + int tx_ring_ref; + int rx_ring_ref; + uint8_t mac[ETHER_ADDR_LEN]; + struct xn_chain_data xn_cdata; /* mbufs */ + struct mbuf_head xn_rx_batch; /* head of the batch queue */ + + int xn_if_flags; + struct callout xn_stat_ch; + + u_long rx_pfn_array[NET_RX_RING_SIZE]; + multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; + mmu_update_t rx_mmu[NET_RX_RING_SIZE]; +}; + +#define rx_mbufs xn_cdata.xn_rx_chain +#define tx_mbufs xn_cdata.xn_tx_chain + +#define XN_LOCK_INIT(_sc, _name) \ + mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \ + mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF); \ + sx_init(&(_sc)->sc_lock, #_name"_rx") + +#define XN_RX_LOCK(_sc) mtx_lock(&(_sc)->rx_lock) +#define XN_RX_UNLOCK(_sc) mtx_unlock(&(_sc)->rx_lock) + +#define XN_TX_LOCK(_sc) mtx_lock(&(_sc)->tx_lock) +#define XN_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->tx_lock) + +#define XN_LOCK(_sc) sx_xlock(&(_sc)->sc_lock); +#define XN_UNLOCK(_sc) sx_xunlock(&(_sc)->sc_lock); + +#define XN_LOCK_ASSERT(_sc) sx_assert(&(_sc)->sc_lock, SX_LOCKED); +#define XN_RX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->rx_lock, MA_OWNED); +#define XN_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->tx_lock, MA_OWNED); +#define XN_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->rx_lock); \ + mtx_destroy(&(_sc)->tx_lock); \ + sx_destroy(&(_sc)->sc_lock); + +struct netfront_rx_info { + struct netif_rx_response rx; + struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1]; +}; + +#define netfront_carrier_on(netif) ((netif)->carrier = 1) +#define netfront_carrier_off(netif) ((netif)->carrier = 0) +#define netfront_carrier_ok(netif) ((netif)->carrier) + +/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */ + + + +/* + * Access macros for acquiring freeing slots in tx_skbs[]. + */ + +static inline void +add_id_to_freelist(struct mbuf **list, unsigned short id) +{ + list[id] = list[0]; + list[0] = (void *)(u_long)id; +} + +static inline unsigned short +get_id_from_freelist(struct mbuf **list) +{ + u_int id = (u_int)(u_long)list[0]; + list[0] = list[id]; + return (id); +} + +static inline int +xennet_rxidx(RING_IDX idx) +{ + return idx & (NET_RX_RING_SIZE - 1); +} + +static inline struct mbuf * +xennet_get_rx_mbuf(struct netfront_info *np, + RING_IDX ri) +{ + int i = xennet_rxidx(ri); + struct mbuf *m; + + m = np->rx_mbufs[i]; + np->rx_mbufs[i] = NULL; + return (m); +} + +static inline grant_ref_t +xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri) +{ + int i = xennet_rxidx(ri); + grant_ref_t ref = np->grant_rx_ref[i]; + np->grant_rx_ref[i] = GRANT_INVALID_REF; + return ref; +} + +#ifdef DEBUG + +#endif +#define IPRINTK(fmt, args...) \ + printf("[XEN] " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printf("[XEN] " fmt, ##args) +#if 0 +#define DPRINTK(fmt, args...) \ + printf("[XEN] %s: " fmt, __func__, ##args) +#else +#define DPRINTK(fmt, args...) +#endif + +/** + * Read the 'mac' node at the given device's node in the store, and parse that + * as colon-separated octets, placing result the given mac array. mac must be + * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h). + * Return 0 on success, or errno on error. + */ +static int +xen_net_read_mac(device_t dev, uint8_t mac[]) +{ + int error, i; + char *s, *e, *macstr; + + error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL, + (void **) &macstr); + if (error) + return (error); + + s = macstr; + for (i = 0; i < ETHER_ADDR_LEN; i++) { + mac[i] = strtoul(s, &e, 16); + if (s == e || (e[0] != ':' && e[0] != 0)) { + free(macstr, M_DEVBUF); + return (ENOENT); + } + s = &e[1]; + } + free(macstr, M_DEVBUF); + return (0); +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffers for communication with the backend, and + * inform the backend of the appropriate details for those. Switch to + * Connected state. + */ +static int +netfront_probe(device_t dev) +{ + + if (!strcmp(xenbus_get_type(dev), "vif")) { + device_set_desc(dev, "Virtual Network Interface"); + return (0); + } + + return (ENXIO); +} + +static int +netfront_attach(device_t dev) +{ + int err; + + err = create_netdev(dev); + if (err) { + xenbus_dev_fatal(dev, err, "creating netdev"); + return err; + } + +#if __FreeBSD_version >= 700000 + SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), + SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW, + &xn_enable_lro, 0, "Large Receive Offload"); +#endif + + return 0; +} + + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our netif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int +netfront_resume(device_t dev) +{ + struct netfront_info *info = device_get_softc(dev); + + netif_disconnect_backend(info); + return (0); +} + + +/* Common code used when first setting up, and when resuming. */ +static int +talk_to_backend(device_t dev, struct netfront_info *info) +{ + const char *message; + struct xenbus_transaction xbt; + const char *node = xenbus_get_node(dev); + int err; + + err = xen_net_read_mac(dev, info->mac); + if (err) { + xenbus_dev_fatal(dev, err, "parsing %s/mac", node); + goto out; + } + + /* Create shared ring, alloc event channel. */ + err = setup_device(dev, info); + if (err) + goto out; + + again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_ring; + } + err = xenbus_printf(xbt, node, "tx-ring-ref","%u", + info->tx_ring_ref); + if (err) { + message = "writing tx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, node, "rx-ring-ref","%u", + info->rx_ring_ref); + if (err) { + message = "writing rx ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, node, + "event-channel", "%u", irq_to_evtchn_port(info->irq)); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, node, "request-rx-copy", "%u", + info->copying_receiver); + if (err) { + message = "writing request-rx-copy"; + goto abort_transaction; + } + err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1); + if (err) { + message = "writing feature-rx-notify"; + goto abort_transaction; + } + err = xenbus_printf(xbt, node, "feature-sg", "%d", 1); + if (err) { + message = "writing feature-sg"; + goto abort_transaction; + } +#if __FreeBSD_version >= 700000 + err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1); + if (err) { + message = "writing feature-gso-tcpv4"; + goto abort_transaction; + } +#endif + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_ring; + } + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + xenbus_dev_fatal(dev, err, "%s", message); + destroy_ring: + netif_free(info); + out: + return err; +} + + +static int +setup_device(device_t dev, struct netfront_info *info) +{ + netif_tx_sring_t *txs; + netif_rx_sring_t *rxs; + int error; + struct ifnet *ifp; + + ifp = info->xn_ifp; + + info->tx_ring_ref = GRANT_INVALID_REF; + info->rx_ring_ref = GRANT_INVALID_REF; + info->rx.sring = NULL; + info->tx.sring = NULL; + info->irq = 0; + + txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + if (!txs) { + error = ENOMEM; + xenbus_dev_fatal(dev, error, "allocating tx ring page"); + goto fail; + } + SHARED_RING_INIT(txs); + FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); + error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref); + if (error) + goto fail; + + rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + if (!rxs) { + error = ENOMEM; + xenbus_dev_fatal(dev, error, "allocating rx ring page"); + goto fail; + } + SHARED_RING_INIT(rxs); + FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); + + error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref); + if (error) + goto fail; + + error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), + "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); + + if (error) { + xenbus_dev_fatal(dev, error, + "bind_evtchn_to_irqhandler failed"); + goto fail; + } + + show_device(info); + + return (0); + + fail: + netif_free(info); + return (error); +} + +/** + * If this interface has an ipv4 address, send an arp for it. This + * helps to get the network going again after migrating hosts. + */ +static void +netfront_send_fake_arp(device_t dev, struct netfront_info *info) +{ + struct ifnet *ifp; + struct ifaddr *ifa; + + ifp = info->xn_ifp; + TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family == AF_INET) { + arp_ifinit(ifp, ifa); + } + } +} + +/** + * Callback received when the backend's state changes. + */ +static void +netfront_backend_changed(device_t dev, XenbusState newstate) +{ + struct netfront_info *sc = device_get_softc(dev); + + DPRINTK("newstate=%d\n", newstate); + + switch (newstate) { + case XenbusStateInitialising: + case XenbusStateInitialised: + case XenbusStateConnected: + case XenbusStateUnknown: + case XenbusStateClosed: + case XenbusStateReconfigured: + case XenbusStateReconfiguring: + break; + case XenbusStateInitWait: + if (xenbus_get_state(dev) != XenbusStateInitialising) + break; + if (network_connect(sc) != 0) + break; + xenbus_set_state(dev, XenbusStateConnected); + netfront_send_fake_arp(dev, sc); + break; + case XenbusStateClosing: + xenbus_set_state(dev, XenbusStateClosed); + break; + } +} + +static void +xn_free_rx_ring(struct netfront_info *sc) +{ +#if 0 + int i; + + for (i = 0; i < NET_RX_RING_SIZE; i++) { + if (sc->xn_cdata.xn_rx_chain[i] != NULL) { + m_freem(sc->xn_cdata.xn_rx_chain[i]); + sc->xn_cdata.xn_rx_chain[i] = NULL; + } + } + + sc->rx.rsp_cons = 0; + sc->xn_rx_if->req_prod = 0; + sc->xn_rx_if->event = sc->rx.rsp_cons ; +#endif +} + +static void +xn_free_tx_ring(struct netfront_info *sc) +{ +#if 0 + int i; + + for (i = 0; i < NET_TX_RING_SIZE; i++) { + if (sc->xn_cdata.xn_tx_chain[i] != NULL) { + m_freem(sc->xn_cdata.xn_tx_chain[i]); + sc->xn_cdata.xn_tx_chain[i] = NULL; + } + } + + return; +#endif +} + +static inline int +netfront_tx_slot_available(struct netfront_info *np) +{ + return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < + (TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2)); +} +static void +netif_release_tx_bufs(struct netfront_info *np) +{ + struct mbuf *m; + int i; + + for (i = 1; i <= NET_TX_RING_SIZE; i++) { + m = np->xn_cdata.xn_tx_chain[i]; + + if (((u_long)m) < KERNBASE) + continue; + gnttab_grant_foreign_access_ref(np->grant_tx_ref[i], + xenbus_get_otherend_id(np->xbdev), + virt_to_mfn(mtod(m, vm_offset_t)), + GNTMAP_readonly); + gnttab_release_grant_reference(&np->gref_tx_head, + np->grant_tx_ref[i]); + np->grant_tx_ref[i] = GRANT_INVALID_REF; + add_id_to_freelist(np->tx_mbufs, i); + m_freem(m); + } +} + +static void +network_alloc_rx_buffers(struct netfront_info *sc) +{ + int otherend_id = xenbus_get_otherend_id(sc->xbdev); + unsigned short id; + struct mbuf *m_new; + int i, batch_target, notify; + RING_IDX req_prod; + struct xen_memory_reservation reservation; + grant_ref_t ref; + int nr_flips; + netif_rx_request_t *req; + vm_offset_t vaddr; + u_long pfn; + + req_prod = sc->rx.req_prod_pvt; + + if (unlikely(sc->carrier == 0)) + return; + + /* + * Allocate skbuffs greedily, even though we batch updates to the + * receive ring. This creates a less bursty demand on the memory + * allocator, so should reduce the chance of failed allocation + * requests both for ourself and for other kernel subsystems. + */ + batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); + for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { + MGETHDR(m_new, M_DONTWAIT, MT_DATA); + if (m_new == NULL) + goto no_mbuf; + + m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE); + if ((m_new->m_flags & M_EXT) == 0) { + m_freem(m_new); + +no_mbuf: + if (i != 0) + goto refill; + /* + * XXX set timer + */ + break; + } + m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE; + + /* queue the mbufs allocated */ + mbufq_tail(&sc->xn_rx_batch, m_new); + } + + /* Is the batch large enough to be worthwhile? */ + if (i < (sc->rx_target/2)) { + if (req_prod >sc->rx.sring->req_prod) + goto push; + return; + } + /* Adjust floating fill target if we risked running out of buffers. */ + if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) && + ((sc->rx_target *= 2) > sc->rx_max_target) ) + sc->rx_target = sc->rx_max_target; + +refill: + for (nr_flips = i = 0; ; i++) { + if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL) + break; + + m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)( + vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT); + + id = xennet_rxidx(req_prod + i); + + KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL, + ("non-NULL xm_rx_chain")); + sc->xn_cdata.xn_rx_chain[id] = m_new; + + ref = gnttab_claim_grant_reference(&sc->gref_rx_head); + KASSERT((short)ref >= 0, ("negative ref")); + sc->grant_rx_ref[id] = ref; + + vaddr = mtod(m_new, vm_offset_t); + pfn = vtophys(vaddr) >> PAGE_SHIFT; + req = RING_GET_REQUEST(&sc->rx, req_prod + i); + + if (sc->copying_receiver == 0) { + gnttab_grant_foreign_transfer_ref(ref, + otherend_id, pfn); + sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn); + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + /* Remove this page before passing + * back to Xen. + */ + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + MULTI_update_va_mapping(&sc->rx_mcl[i], + vaddr, 0, 0); + } + nr_flips++; + } else { + gnttab_grant_foreign_access_ref(ref, + otherend_id, + PFNTOMFN(pfn), 0); + } + req->id = id; + req->gref = ref; + + sc->rx_pfn_array[i] = + vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT; + } + + KASSERT(i, ("no mbufs processed")); /* should have returned earlier */ + KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed")); + /* + * We may have allocated buffers which have entries outstanding + * in the page * update queue -- make sure we flush those first! + */ + PT_UPDATES_FLUSH(); + if (nr_flips != 0) { +#ifdef notyet + /* Tell the ballon driver what is going on. */ + balloon_update_driver_allowance(i); +#endif + set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array); + reservation.nr_extents = i; + reservation.extent_order = 0; + reservation.address_bits = 0; + reservation.domid = DOMID_SELF; + + if (!xen_feature(XENFEAT_auto_translated_physmap)) { + + /* After all PTEs have been zapped, flush the TLB. */ + sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] = + UVMF_TLB_FLUSH|UVMF_ALL; + + /* Give away a batch of pages. */ + sc->rx_mcl[i].op = __HYPERVISOR_memory_op; + sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation; + sc->rx_mcl[i].args[1] = (u_long)&reservation; + /* Zap PTEs and give away pages in one big multicall. */ + (void)HYPERVISOR_multicall(sc->rx_mcl, i+1); + + /* Check return status of HYPERVISOR_dom_mem_op(). */ + if (unlikely(sc->rx_mcl[i].result != i)) + panic("Unable to reduce memory reservation\n"); + } else { + if (HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation) + != i) + panic("Unable to reduce memory " + "reservation\n"); + } + } else { + wmb(); + } + + /* Above is a suitable barrier to ensure backend will see requests. */ + sc->rx.req_prod_pvt = req_prod + i; +push: + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); + if (notify) + notify_remote_via_irq(sc->irq); +} + +static void +xn_rxeof(struct netfront_info *np) +{ + struct ifnet *ifp; +#if __FreeBSD_version >= 700000 + struct lro_ctrl *lro = &np->xn_lro; + struct lro_entry *queued; +#endif + struct netfront_rx_info rinfo; + struct netif_rx_response *rx = &rinfo.rx; + struct netif_extra_info *extras = rinfo.extras; + RING_IDX i, rp; + multicall_entry_t *mcl; + struct mbuf *m; + struct mbuf_head rxq, errq; + int err, pages_flipped = 0, work_to_do; + + do { + XN_RX_LOCK_ASSERT(np); + if (!netfront_carrier_ok(np)) + return; + + mbufq_init(&errq); + mbufq_init(&rxq); + + ifp = np->xn_ifp; + + rp = np->rx.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + i = np->rx.rsp_cons; + while ((i != rp)) { + memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx)); + memset(extras, 0, sizeof(rinfo.extras)); + + m = NULL; + err = xennet_get_responses(np, &rinfo, rp, &m, + &pages_flipped); + + if (unlikely(err)) { + if (m) + mbufq_tail(&errq, m); + np->stats.rx_errors++; + i = np->rx.rsp_cons; + continue; + } + + m->m_pkthdr.rcvif = ifp; + if ( rx->flags & NETRXF_data_validated ) { + /* Tell the stack the checksums are okay */ + /* + * XXX this isn't necessarily the case - need to add + * check + */ + + m->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID + | CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_data = 0xffff; + } + + np->stats.rx_packets++; + np->stats.rx_bytes += m->m_pkthdr.len; + + mbufq_tail(&rxq, m); + np->rx.rsp_cons = ++i; + } + + if (pages_flipped) { + /* Some pages are no longer absent... */ +#ifdef notyet + balloon_update_driver_allowance(-pages_flipped); +#endif + /* Do all the remapping work, and M->P updates, in one big + * hypercall. + */ + if (!!xen_feature(XENFEAT_auto_translated_physmap)) { + mcl = np->rx_mcl + pages_flipped; + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (u_long)np->rx_mmu; + mcl->args[1] = pages_flipped; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + (void)HYPERVISOR_multicall(np->rx_mcl, + pages_flipped + 1); + } + } + + while ((m = mbufq_dequeue(&errq))) + m_freem(m); + + /* + * Process all the mbufs after the remapping is complete. + * Break the mbuf chain first though. + */ + while ((m = mbufq_dequeue(&rxq)) != NULL) { + ifp->if_ipackets++; + + /* + * Do we really need to drop the rx lock? + */ + XN_RX_UNLOCK(np); +#if __FreeBSD_version >= 700000 + /* Use LRO if possible */ + if ((ifp->if_capenable & IFCAP_LRO) == 0 || + lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) { + /* + * If LRO fails, pass up to the stack + * directly. + */ + (*ifp->if_input)(ifp, m); + } +#else + (*ifp->if_input)(ifp, m); +#endif + XN_RX_LOCK(np); + } + + np->rx.rsp_cons = i; + +#if __FreeBSD_version >= 700000 + /* + * Flush any outstanding LRO work + */ + while (!SLIST_EMPTY(&lro->lro_active)) { + queued = SLIST_FIRST(&lro->lro_active); + SLIST_REMOVE_HEAD(&lro->lro_active, next); + tcp_lro_flush(lro, queued); + } +#endif + +#if 0 + /* If we get a callback with very few responses, reduce fill target. */ + /* NB. Note exponential increase, linear decrease. */ + if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > + ((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target)) + np->rx_target = np->rx_min_target; +#endif + + network_alloc_rx_buffers(np); + + RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do); + } while (work_to_do); +} + +static void +xn_txeof(struct netfront_info *np) +{ + RING_IDX i, prod; + unsigned short id; + struct ifnet *ifp; + netif_tx_response_t *txr; + struct mbuf *m; + + XN_TX_LOCK_ASSERT(np); + + if (!netfront_carrier_ok(np)) + return; + + ifp = np->xn_ifp; + ifp->if_timer = 0; + + do { + prod = np->tx.sring->rsp_prod; + rmb(); /* Ensure we see responses up to 'rp'. */ + + for (i = np->tx.rsp_cons; i != prod; i++) { + txr = RING_GET_RESPONSE(&np->tx, i); + if (txr->status == NETIF_RSP_NULL) + continue; + + id = txr->id; + m = np->xn_cdata.xn_tx_chain[id]; + + /* + * Increment packet count if this is the last + * mbuf of the chain. + */ + if (!m->m_next) + ifp->if_opackets++; + KASSERT(m != NULL, ("mbuf not found in xn_tx_chain")); + M_ASSERTVALID(m); + if (unlikely(gnttab_query_foreign_access( + np->grant_tx_ref[id]) != 0)) { + printf("network_tx_buf_gc: warning " + "-- grant still in use by backend " + "domain.\n"); + goto out; + } + gnttab_end_foreign_access_ref( + np->grant_tx_ref[id]); + gnttab_release_grant_reference( + &np->gref_tx_head, np->grant_tx_ref[id]); + np->grant_tx_ref[id] = GRANT_INVALID_REF; + + np->xn_cdata.xn_tx_chain[id] = NULL; + add_id_to_freelist(np->xn_cdata.xn_tx_chain, id); + m_free(m); + } + np->tx.rsp_cons = prod; + + /* + * Set a new event, then check for race with update of + * tx_cons. Note that it is essential to schedule a + * callback, no matter how few buffers are pending. Even if + * there is space in the transmit ring, higher layers may + * be blocked because too much data is outstanding: in such + * cases notification from Xen is likely to be the only kick + * that we'll get. + */ + np->tx.sring->rsp_event = + prod + ((np->tx.sring->req_prod - prod) >> 1) + 1; + + mb(); + + } while (prod != np->tx.sring->rsp_prod); + + out: + if (np->tx_full && + ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) { + np->tx_full = 0; +#if 0 + if (np->user_state == UST_OPEN) + netif_wake_queue(dev); +#endif + } + +} + +static void +xn_intr(void *xsc) +{ + struct netfront_info *np = xsc; + struct ifnet *ifp = np->xn_ifp; + +#if 0 + if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod && + likely(netfront_carrier_ok(np)) && + ifp->if_drv_flags & IFF_DRV_RUNNING)) + return; +#endif + if (np->tx.rsp_cons != np->tx.sring->rsp_prod) { + XN_TX_LOCK(np); + xn_txeof(np); + XN_TX_UNLOCK(np); + } + + XN_RX_LOCK(np); + xn_rxeof(np); + XN_RX_UNLOCK(np); + + if (ifp->if_drv_flags & IFF_DRV_RUNNING && + !IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + xn_start(ifp); +} + + +static void +xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m, + grant_ref_t ref) +{ + int new = xennet_rxidx(np->rx.req_prod_pvt); + + KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL")); + np->rx_mbufs[new] = m; + np->grant_rx_ref[new] = ref; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new; + RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref; + np->rx.req_prod_pvt++; +} + +static int +xennet_get_extras(struct netfront_info *np, + struct netif_extra_info *extras, RING_IDX rp) +{ + struct netif_extra_info *extra; + RING_IDX cons = np->rx.rsp_cons; + + int err = 0; + + do { + struct mbuf *m; + grant_ref_t ref; + + if (unlikely(cons + 1 == rp)) { +#if 0 + if (net_ratelimit()) + WPRINTK("Missing extra info\n"); +#endif + err = -EINVAL; + break; + } + + extra = (struct netif_extra_info *) + RING_GET_RESPONSE(&np->rx, ++cons); + + if (unlikely(!extra->type || + extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) { +#if 0 + if (net_ratelimit()) + WPRINTK("Invalid extra type: %d\n", + extra->type); +#endif + err = -EINVAL; + } else { + memcpy(&extras[extra->type - 1], extra, sizeof(*extra)); + } + + m = xennet_get_rx_mbuf(np, cons); + ref = xennet_get_rx_ref(np, cons); + xennet_move_rx_slot(np, m, ref); + } while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE); + + np->rx.rsp_cons = cons; + return err; +} + +static int +xennet_get_responses(struct netfront_info *np, + struct netfront_rx_info *rinfo, RING_IDX rp, + struct mbuf **list, + int *pages_flipped_p) +{ + int pages_flipped = *pages_flipped_p; + struct mmu_update *mmu; + struct multicall_entry *mcl; + struct netif_rx_response *rx = &rinfo->rx; + struct netif_extra_info *extras = rinfo->extras; + RING_IDX cons = np->rx.rsp_cons; + struct mbuf *m, *m0, *m_prev; + grant_ref_t ref = xennet_get_rx_ref(np, cons); + int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */; + int frags = 1; + int err = 0; + u_long ret; + + m0 = m = m_prev = xennet_get_rx_mbuf(np, cons); + + + if (rx->flags & NETRXF_extra_info) { + err = xennet_get_extras(np, extras, rp); + cons = np->rx.rsp_cons; + } + + + if (m0 != NULL) { + m0->m_pkthdr.len = 0; + m0->m_next = NULL; + } + + for (;;) { + u_long mfn; + +#if 0 + printf("rx->status=%hd rx->offset=%hu frags=%u\n", + rx->status, rx->offset, frags); +#endif + if (unlikely(rx->status < 0 || + rx->offset + rx->status > PAGE_SIZE)) { +#if 0 + if (net_ratelimit()) + WPRINTK("rx->offset: %x, size: %u\n", + rx->offset, rx->status); +#endif + xennet_move_rx_slot(np, m, ref); + err = -EINVAL; + goto next; + } + + /* + * This definitely indicates a bug, either in this driver or in + * the backend driver. In future this should flag the bad + * situation to the system controller to reboot the backed. + */ + if (ref == GRANT_INVALID_REF) { +#if 0 + if (net_ratelimit()) + WPRINTK("Bad rx response id %d.\n", rx->id); +#endif + err = -EINVAL; + goto next; + } + + if (!np->copying_receiver) { + /* Memory pressure, insufficient buffer + * headroom, ... + */ + if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) { + if (net_ratelimit()) + WPRINTK("Unfulfilled rx req " + "(id=%d, st=%d).\n", + rx->id, rx->status); + xennet_move_rx_slot(np, m, ref); + err = -ENOMEM; + goto next; + } + + if (!xen_feature( XENFEAT_auto_translated_physmap)) { + /* Remap the page. */ + void *vaddr = mtod(m, void *); + uint32_t pfn; + + mcl = np->rx_mcl + pages_flipped; + mmu = np->rx_mmu + pages_flipped; + + MULTI_update_va_mapping(mcl, (u_long)vaddr, + (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW | + PG_V | PG_M | PG_A, 0); + pfn = (uintptr_t)m->m_ext.ext_args; + mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) | + MMU_MACHPHYS_UPDATE; + mmu->val = pfn; + + set_phys_to_machine(pfn, mfn); + } + pages_flipped++; + } else { + ret = gnttab_end_foreign_access_ref(ref); + KASSERT(ret, ("ret != 0")); + } + + gnttab_release_grant_reference(&np->gref_rx_head, ref); + +next: + if (m != NULL) { + m->m_len = rx->status; + m->m_data += rx->offset; + m0->m_pkthdr.len += rx->status; + } + + if (!(rx->flags & NETRXF_more_data)) + break; + + if (cons + frags == rp) { + if (net_ratelimit()) + WPRINTK("Need more frags\n"); + err = -ENOENT; + break; + } + m_prev = m; + + rx = RING_GET_RESPONSE(&np->rx, cons + frags); + m = xennet_get_rx_mbuf(np, cons + frags); + + m_prev->m_next = m; + m->m_next = NULL; + ref = xennet_get_rx_ref(np, cons + frags); + frags++; + } + *list = m0; + + if (unlikely(frags > max)) { + if (net_ratelimit()) + WPRINTK("Too many frags\n"); + err = -E2BIG; + } + + if (unlikely(err)) + np->rx.rsp_cons = cons + frags; + + *pages_flipped_p = pages_flipped; + + return err; +} + +static void +xn_tick_locked(struct netfront_info *sc) +{ + XN_RX_LOCK_ASSERT(sc); + callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); + + /* XXX placeholder for printing debug information */ + +} + + +static void +xn_tick(void *xsc) +{ + struct netfront_info *sc; + + sc = xsc; + XN_RX_LOCK(sc); + xn_tick_locked(sc); + XN_RX_UNLOCK(sc); + +} +static void +xn_start_locked(struct ifnet *ifp) +{ + int otherend_id; + unsigned short id; + struct mbuf *m_head, *m; + struct netfront_info *sc; + netif_tx_request_t *tx; + netif_extra_info_t *extra; + RING_IDX i; + grant_ref_t ref; + u_long mfn, tx_bytes; + int notify, nfrags; + + sc = ifp->if_softc; + otherend_id = xenbus_get_otherend_id(sc->xbdev); + tx_bytes = 0; + + if (!netfront_carrier_ok(sc)) + return; + + for (i = sc->tx.req_prod_pvt; TRUE; i++) { + IF_DEQUEUE(&ifp->if_snd, m_head); + if (m_head == NULL) + break; + + if (!netfront_tx_slot_available(sc)) { + IF_PREPEND(&ifp->if_snd, m_head); + ifp->if_drv_flags |= IFF_DRV_OACTIVE; + break; + } + + + /* + * Defragment the mbuf if necessary. + */ + for (m = m_head, nfrags = 0; m; m = m->m_next) + nfrags++; + if (nfrags > MAX_SKB_FRAGS) { + m = m_defrag(m_head, M_DONTWAIT); + if (!m) { + m_freem(m_head); + break; + } + m_head = m; + } + + /* + * Start packing the mbufs in this chain into + * the fragment pointers. Stop when we run out + * of fragments or hit the end of the mbuf chain. + */ + m = m_head; + extra = NULL; + for (m = m_head; m; m = m->m_next) { + tx = RING_GET_REQUEST(&sc->tx, i); + id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain); + sc->xn_cdata.xn_tx_chain[id] = m; + tx->id = id; + ref = gnttab_claim_grant_reference(&sc->gref_tx_head); + KASSERT((short)ref >= 0, ("Negative ref")); + mfn = virt_to_mfn(mtod(m, vm_offset_t)); + gnttab_grant_foreign_access_ref(ref, otherend_id, + mfn, GNTMAP_readonly); + tx->gref = sc->grant_tx_ref[id] = ref; + tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1); + tx->flags = 0; + if (m == m_head) { + /* + * The first fragment has the entire packet + * size, subsequent fragments have just the + * fragment size. The backend works out the + * true size of the first fragment by + * subtracting the sizes of the other + * fragments. + */ + tx->size = m->m_pkthdr.len; + + /* + * The first fragment contains the + * checksum flags and is optionally + * followed by extra data for TSO etc. + */ + if (m->m_pkthdr.csum_flags + & CSUM_DELAY_DATA) { + tx->flags |= (NETTXF_csum_blank + | NETTXF_data_validated); + } +#if __FreeBSD_version >= 700000 + if (m->m_pkthdr.csum_flags & CSUM_TSO) { + struct netif_extra_info *gso = + (struct netif_extra_info *) + RING_GET_REQUEST(&sc->tx, ++i); + + if (extra) + extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE; + else + tx->flags |= NETTXF_extra_info; + + gso->u.gso.size = m->m_pkthdr.tso_segsz; + gso->u.gso.type = + XEN_NETIF_GSO_TYPE_TCPV4; + gso->u.gso.pad = 0; + gso->u.gso.features = 0; + + gso->type = XEN_NETIF_EXTRA_TYPE_GSO; + gso->flags = 0; + extra = gso; + } +#endif + } else { + tx->size = m->m_len; + } + if (m->m_next) { + tx->flags |= NETTXF_more_data; + i++; + } + } + + BPF_MTAP(ifp, m_head); + + sc->stats.tx_bytes += m_head->m_pkthdr.len; + sc->stats.tx_packets++; + } + + sc->tx.req_prod_pvt = i; + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); + if (notify) + notify_remote_via_irq(sc->irq); + + xn_txeof(sc); + + if (RING_FULL(&sc->tx)) { + sc->tx_full = 1; +#if 0 + netif_stop_queue(dev); +#endif + } + + return; +} + +static void +xn_start(struct ifnet *ifp) +{ + struct netfront_info *sc; + sc = ifp->if_softc; + XN_TX_LOCK(sc); + xn_start_locked(ifp); + XN_TX_UNLOCK(sc); +} + +/* equivalent of network_open() in Linux */ +static void +xn_ifinit_locked(struct netfront_info *sc) +{ + struct ifnet *ifp; + + XN_LOCK_ASSERT(sc); + + ifp = sc->xn_ifp; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + return; + + xn_stop(sc); + + network_alloc_rx_buffers(sc); + sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1; + + ifp->if_drv_flags |= IFF_DRV_RUNNING; + ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + + callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc); + +} + + +static void +xn_ifinit(void *xsc) +{ + struct netfront_info *sc = xsc; + + XN_LOCK(sc); + xn_ifinit_locked(sc); + XN_UNLOCK(sc); + +} + + +static int +xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct netfront_info *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *) data; + struct ifaddr *ifa = (struct ifaddr *)data; + + int mask, error = 0; + switch(cmd) { + case SIOCSIFADDR: + case SIOCGIFADDR: + XN_LOCK(sc); + if (ifa->ifa_addr->sa_family == AF_INET) { + ifp->if_flags |= IFF_UP; + if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) + xn_ifinit_locked(sc); + arp_ifinit(ifp, ifa); + XN_UNLOCK(sc); + } else { + XN_UNLOCK(sc); + error = ether_ioctl(ifp, cmd, data); + } + break; + case SIOCSIFMTU: + /* XXX can we alter the MTU on a VN ?*/ +#ifdef notyet + if (ifr->ifr_mtu > XN_JUMBO_MTU) + error = EINVAL; + else +#endif + { + ifp->if_mtu = ifr->ifr_mtu; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + xn_ifinit(sc); + } + break; + case SIOCSIFFLAGS: + XN_LOCK(sc); + if (ifp->if_flags & IFF_UP) { + /* + * If only the state of the PROMISC flag changed, + * then just use the 'set promisc mode' command + * instead of reinitializing the entire NIC. Doing + * a full re-init means reloading the firmware and + * waiting for it to start up, which may take a + * second or two. + */ +#ifdef notyet + /* No promiscuous mode with Xen */ + if (ifp->if_drv_flags & IFF_DRV_RUNNING && + ifp->if_flags & IFF_PROMISC && + !(sc->xn_if_flags & IFF_PROMISC)) { + XN_SETBIT(sc, XN_RX_MODE, + XN_RXMODE_RX_PROMISC); + } else if (ifp->if_drv_flags & IFF_DRV_RUNNING && + !(ifp->if_flags & IFF_PROMISC) && + sc->xn_if_flags & IFF_PROMISC) { + XN_CLRBIT(sc, XN_RX_MODE, + XN_RXMODE_RX_PROMISC); + } else +#endif + xn_ifinit_locked(sc); + } else { + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + xn_stop(sc); + } + } + sc->xn_if_flags = ifp->if_flags; + XN_UNLOCK(sc); + error = 0; + break; + case SIOCSIFCAP: + mask = ifr->ifr_reqcap ^ ifp->if_capenable; + if (mask & IFCAP_TXCSUM) { + if (IFCAP_TXCSUM & ifp->if_capenable) { + ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4); + ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP + | CSUM_IP | CSUM_TSO); + } else { + ifp->if_capenable |= IFCAP_TXCSUM; + ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP + | CSUM_IP); + } + } + if (mask & IFCAP_RXCSUM) { + ifp->if_capenable ^= IFCAP_RXCSUM; + } +#if __FreeBSD_version >= 700000 + if (mask & IFCAP_TSO4) { + if (IFCAP_TSO4 & ifp->if_capenable) { + ifp->if_capenable &= ~IFCAP_TSO4; + ifp->if_hwassist &= ~CSUM_TSO; + } else if (IFCAP_TXCSUM & ifp->if_capenable) { + ifp->if_capenable |= IFCAP_TSO4; + ifp->if_hwassist |= CSUM_TSO; + } else { + DPRINTK("Xen requires tx checksum offload" + " be enabled to use TSO\n"); + error = EINVAL; + } + } + if (mask & IFCAP_LRO) { + ifp->if_capenable ^= IFCAP_LRO; + + } +#endif + error = 0; + break; + case SIOCADDMULTI: + case SIOCDELMULTI: +#ifdef notyet + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + XN_LOCK(sc); + xn_setmulti(sc); + XN_UNLOCK(sc); + error = 0; + } +#endif + /* FALLTHROUGH */ + case SIOCSIFMEDIA: + case SIOCGIFMEDIA: + error = EINVAL; + break; + default: + error = ether_ioctl(ifp, cmd, data); + } + + return (error); +} + +static void +xn_stop(struct netfront_info *sc) +{ + struct ifnet *ifp; + + XN_LOCK_ASSERT(sc); + + ifp = sc->xn_ifp; + + callout_stop(&sc->xn_stat_ch); + + xn_free_rx_ring(sc); + xn_free_tx_ring(sc); + + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); +} + +/* START of Xenolinux helper functions adapted to FreeBSD */ +int +network_connect(struct netfront_info *np) +{ + int i, requeue_idx, error; + grant_ref_t ref; + netif_rx_request_t *req; + u_int feature_rx_copy, feature_rx_flip; + + error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), + "feature-rx-copy", NULL, "%u", &feature_rx_copy); + if (error) + feature_rx_copy = 0; + error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev), + "feature-rx-flip", NULL, "%u", &feature_rx_flip); + if (error) + feature_rx_flip = 1; + + /* + * Copy packets on receive path if: + * (a) This was requested by user, and the backend supports it; or + * (b) Flipping was requested, but this is unsupported by the backend. + */ + np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) || + (MODPARM_rx_flip && !feature_rx_flip)); + + XN_LOCK(np); + /* Recovery procedure: */ + error = talk_to_backend(np->xbdev, np); + if (error) + return (error); + + /* Step 1: Reinitialise variables. */ + netif_release_tx_bufs(np); + + /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */ + for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) { + struct mbuf *m; + u_long pfn; + + if (np->rx_mbufs[i] == NULL) + continue; + + m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i); + ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i); + req = RING_GET_REQUEST(&np->rx, requeue_idx); + pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT; + + if (!np->copying_receiver) { + gnttab_grant_foreign_transfer_ref(ref, + xenbus_get_otherend_id(np->xbdev), + pfn); + } else { + gnttab_grant_foreign_access_ref(ref, + xenbus_get_otherend_id(np->xbdev), + PFNTOMFN(pfn), 0); + } + req->gref = ref; + req->id = requeue_idx; + + requeue_idx++; + } + + np->rx.req_prod_pvt = requeue_idx; + + /* Step 3: All public and private state should now be sane. Get + * ready to start sending and receiving packets and give the driver + * domain a kick because we've probably just requeued some + * packets. + */ + netfront_carrier_on(np); + notify_remote_via_irq(np->irq); + XN_TX_LOCK(np); + xn_txeof(np); + XN_TX_UNLOCK(np); + network_alloc_rx_buffers(np); + XN_UNLOCK(np); + + return (0); +} + +static void +show_device(struct netfront_info *sc) +{ +#ifdef DEBUG + if (sc) { + IPRINTK("\n", + sc->xn_ifno, + be_state_name[sc->xn_backend_state], + sc->xn_user_state ? "open" : "closed", + sc->xn_evtchn, + sc->xn_irq, + sc->xn_tx_if, + sc->xn_rx_if); + } else { + IPRINTK("\n"); + } +#endif +} + +/** Create a network device. + * @param handle device handle + */ +int +create_netdev(device_t dev) +{ + int i; + struct netfront_info *np; + int err; + struct ifnet *ifp; + + np = device_get_softc(dev); + + np->xbdev = dev; + + XN_LOCK_INIT(np, xennetif); + np->rx_target = RX_MIN_TARGET; + np->rx_min_target = RX_MIN_TARGET; + np->rx_max_target = RX_MAX_TARGET; + + /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */ + for (i = 0; i <= NET_TX_RING_SIZE; i++) { + np->tx_mbufs[i] = (void *) ((u_long) i+1); + np->grant_tx_ref[i] = GRANT_INVALID_REF; + } + for (i = 0; i <= NET_RX_RING_SIZE; i++) { + np->rx_mbufs[i] = NULL; + np->grant_rx_ref[i] = GRANT_INVALID_REF; + } + /* A grant for every tx ring slot */ + if (gnttab_alloc_grant_references(TX_MAX_TARGET, + &np->gref_tx_head) < 0) { + printf("#### netfront can't alloc tx grant refs\n"); + err = ENOMEM; + goto exit; + } + /* A grant for every rx ring slot */ + if (gnttab_alloc_grant_references(RX_MAX_TARGET, + &np->gref_rx_head) < 0) { + printf("#### netfront can't alloc rx grant refs\n"); + gnttab_free_grant_references(np->gref_tx_head); + err = ENOMEM; + goto exit; + } + + err = xen_net_read_mac(dev, np->mac); + if (err) { + xenbus_dev_fatal(dev, err, "parsing %s/mac", + xenbus_get_node(dev)); + goto out; + } + + /* Set up ifnet structure */ + ifp = np->xn_ifp = if_alloc(IFT_ETHER); + ifp->if_softc = np; + if_initname(ifp, "xn", device_get_unit(dev)); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_ioctl = xn_ioctl; + ifp->if_output = ether_output; + ifp->if_start = xn_start; +#ifdef notyet + ifp->if_watchdog = xn_watchdog; +#endif + ifp->if_init = xn_ifinit; + ifp->if_mtu = ETHERMTU; + ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; + + ifp->if_hwassist = XN_CSUM_FEATURES; + ifp->if_capabilities = IFCAP_HWCSUM; +#if __FreeBSD_version >= 700000 + ifp->if_capabilities |= IFCAP_TSO4; + if (xn_enable_lro) { + int err = tcp_lro_init(&np->xn_lro); + if (err) { + device_printf(dev, "LRO initialization failed\n"); + goto exit; + } + np->xn_lro.ifp = ifp; + ifp->if_capabilities |= IFCAP_LRO; + } +#endif + ifp->if_capenable = ifp->if_capabilities; + + ether_ifattach(ifp, np->mac); + callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE); + netfront_carrier_off(np); + + return (0); + +exit: + gnttab_free_grant_references(np->gref_tx_head); +out: + panic("do something smart"); + +} + +/** + * Handle the change of state of the backend to Closing. We must delete our + * device-layer structures now, to ensure that writes are flushed through to + * the backend. Once is this done, we can switch to Closed in + * acknowledgement. + */ +#if 0 +static void netfront_closing(device_t dev) +{ +#if 0 + struct netfront_info *info = dev->dev_driver_data; + + DPRINTK("netfront_closing: %s removed\n", dev->nodename); + + close_netdev(info); +#endif + xenbus_switch_state(dev, XenbusStateClosed); +} +#endif + +static int netfront_detach(device_t dev) +{ + struct netfront_info *info = device_get_softc(dev); + + DPRINTK("%s\n", xenbus_get_node(dev)); + + netif_free(info); + + return 0; +} + + +static void netif_free(struct netfront_info *info) +{ + netif_disconnect_backend(info); +#if 0 + close_netdev(info); +#endif +} + +static void netif_disconnect_backend(struct netfront_info *info) +{ + XN_RX_LOCK(info); + XN_TX_LOCK(info); + netfront_carrier_off(info); + XN_TX_UNLOCK(info); + XN_RX_UNLOCK(info); + + end_access(info->tx_ring_ref, info->tx.sring); + end_access(info->rx_ring_ref, info->rx.sring); + info->tx_ring_ref = GRANT_INVALID_REF; + info->rx_ring_ref = GRANT_INVALID_REF; + info->tx.sring = NULL; + info->rx.sring = NULL; + + if (info->irq) + unbind_from_irqhandler(info->irq); + + info->irq = 0; +} + + +static void end_access(int ref, void *page) +{ + if (ref != GRANT_INVALID_REF) + gnttab_end_foreign_access(ref, page); +} + +/* ** Driver registration ** */ +static device_method_t netfront_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, netfront_probe), + DEVMETHOD(device_attach, netfront_attach), + DEVMETHOD(device_detach, netfront_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, netfront_resume), + + /* Xenbus interface */ + DEVMETHOD(xenbus_backend_changed, netfront_backend_changed), + + { 0, 0 } +}; + +static driver_t netfront_driver = { + "xn", + netfront_methods, + sizeof(struct netfront_info), +}; +devclass_t netfront_devclass; + +DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0); Property changes on: dev/xen/netfront/netfront.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Property changes on: dev/xen/netfront ___________________________________________________________________ Added: svn:mergeinfo Merged /stable/7/sys/dev/xen/netfront:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/dev/xen/netfront:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Merged /user/dfr/xenhvm/7/sys/dev/xen/netfront:r188754,188757,188991,188996 Index: dev/xen/blkfront/block.h =================================================================== --- dev/xen/blkfront/block.h (.../stable/6/sys) (revision 0) +++ dev/xen/blkfront/block.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,97 @@ +/* + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ + + +#ifndef __XEN_DRIVERS_BLOCK_H__ +#define __XEN_DRIVERS_BLOCK_H__ +#include + +struct xlbd_type_info +{ + int partn_shift; + int disks_per_major; + char *devname; + char *diskname; +}; + +struct xlbd_major_info +{ + int major; + int index; + int usage; + struct xlbd_type_info *type; +}; + +struct blk_shadow { + blkif_request_t req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) + + +struct xb_softc { + device_t xb_dev; + struct disk *xb_disk; /* disk params */ + struct bio_queue_head xb_bioq; /* sort queue */ + int xb_unit; + int xb_flags; + struct blkfront_info *xb_info; + LIST_ENTRY(xb_softc) entry; +#define XB_OPEN (1<<0) /* drive is open (can't shut down) */ +}; + + +/* + * We have one of these per vbd, whether ide, scsi or 'other'. They + * hang in private_data off the gendisk structure. We may end up + * putting all kinds of interesting stuff here :-) + */ +struct blkfront_info +{ + device_t xbdev; + dev_t dev; + struct gendisk *gd; + int vdevice; + blkif_vdev_t handle; + int connected; + int ring_ref; + blkif_front_ring_t ring; + unsigned int irq; + struct xlbd_major_info *mi; +#if 0 + request_queue_t *rq; + struct work_struct work; +#endif + struct gnttab_free_callback callback; + struct blk_shadow shadow[BLK_RING_SIZE]; + unsigned long shadow_free; + struct xb_softc *sc; + int feature_barrier; + int is_ready; + /** + * The number of people holding this device open. We won't allow a + * hot-unplug unless this is 0. + */ + int users; +}; +/* Note that xlvbd_add doesn't call add_disk for you: you're expected + to call add_disk on info->gd once the disk is properly connected + up. */ +int xlvbd_add(device_t, blkif_sector_t capacity, int device, + uint16_t vdisk_info, uint16_t sector_size, struct blkfront_info *info); +void xlvbd_del(struct blkfront_info *info); + +#endif /* __XEN_DRIVERS_BLOCK_H__ */ + Property changes on: dev/xen/blkfront/block.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/blkfront/blkfront.c =================================================================== --- dev/xen/blkfront/blkfront.c (.../stable/6/sys) (revision 0) +++ dev/xen/blkfront/blkfront.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1119 @@ +/*- + * All rights reserved. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +/* + * XenoBSD block device driver + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "xenbus_if.h" + +#define ASSERT(S) KASSERT(S, (#S)) +/* prototypes */ +struct xb_softc; +static void xb_startio(struct xb_softc *sc); +static void connect(device_t, struct blkfront_info *); +static void blkfront_closing(device_t); +static int blkfront_detach(device_t); +static int talk_to_backend(device_t, struct blkfront_info *); +static int setup_blkring(device_t, struct blkfront_info *); +static void blkif_int(void *); +#if 0 +static void blkif_restart_queue(void *arg); +#endif +static void blkif_recover(struct blkfront_info *); +static void blkif_completion(struct blk_shadow *); +static void blkif_free(struct blkfront_info *, int); + +#define GRANT_INVALID_REF 0 +#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE) + +LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head; + +/* Control whether runtime update of vbds is enabled. */ +#define ENABLE_VBD_UPDATE 0 + +#if ENABLE_VBD_UPDATE +static void vbd_update(void); +#endif + + +#define BLKIF_STATE_DISCONNECTED 0 +#define BLKIF_STATE_CONNECTED 1 +#define BLKIF_STATE_SUSPENDED 2 + +#ifdef notyet +static char *blkif_state_name[] = { + [BLKIF_STATE_DISCONNECTED] = "disconnected", + [BLKIF_STATE_CONNECTED] = "connected", + [BLKIF_STATE_SUSPENDED] = "closed", +}; + +static char * blkif_status_name[] = { + [BLKIF_INTERFACE_STATUS_CLOSED] = "closed", + [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected", + [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected", + [BLKIF_INTERFACE_STATUS_CHANGED] = "changed", +}; +#endif +#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args) +#if 0 +#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) +#endif + +static grant_ref_t gref_head; +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ + (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) + +static void kick_pending_request_queues(struct blkfront_info *); +static int blkif_open(struct disk *dp); +static int blkif_close(struct disk *dp); +static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td); +static int blkif_queue_request(struct bio *bp); +static void xb_strategy(struct bio *bp); + + + +/* XXX move to xb_vbd.c when VBD update support is added */ +#define MAX_VBDS 64 + +#define XBD_SECTOR_SIZE 512 /* XXX: assume for now */ +#define XBD_SECTOR_SHFT 9 + +static struct mtx blkif_io_lock; + +static vm_paddr_t +pfn_to_mfn(vm_paddr_t pfn) +{ + return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT); +} + +/* + * Translate Linux major/minor to an appropriate name and unit + * number. For HVM guests, this allows us to use the same drive names + * with blkfront as the emulated drives, easing transition slightly. + */ +static void +blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name) +{ + static struct vdev_info { + int major; + int shift; + int base; + const char *name; + } info[] = { + {3, 6, 0, "ad"}, /* ide0 */ + {22, 6, 2, "ad"}, /* ide1 */ + {33, 6, 4, "ad"}, /* ide2 */ + {34, 6, 6, "ad"}, /* ide3 */ + {56, 6, 8, "ad"}, /* ide4 */ + {57, 6, 10, "ad"}, /* ide5 */ + {88, 6, 12, "ad"}, /* ide6 */ + {89, 6, 14, "ad"}, /* ide7 */ + {90, 6, 16, "ad"}, /* ide8 */ + {91, 6, 18, "ad"}, /* ide9 */ + + {8, 4, 0, "da"}, /* scsi disk0 */ + {65, 4, 16, "da"}, /* scsi disk1 */ + {66, 4, 32, "da"}, /* scsi disk2 */ + {67, 4, 48, "da"}, /* scsi disk3 */ + {68, 4, 64, "da"}, /* scsi disk4 */ + {69, 4, 80, "da"}, /* scsi disk5 */ + {70, 4, 96, "da"}, /* scsi disk6 */ + {71, 4, 112, "da"}, /* scsi disk7 */ + {128, 4, 128, "da"}, /* scsi disk8 */ + {129, 4, 144, "da"}, /* scsi disk9 */ + {130, 4, 160, "da"}, /* scsi disk10 */ + {131, 4, 176, "da"}, /* scsi disk11 */ + {132, 4, 192, "da"}, /* scsi disk12 */ + {133, 4, 208, "da"}, /* scsi disk13 */ + {134, 4, 224, "da"}, /* scsi disk14 */ + {135, 4, 240, "da"}, /* scsi disk15 */ + + {202, 4, 0, "xbd"}, /* xbd */ + + {0, 0, 0, NULL}, + }; + int major = vdevice >> 8; + int minor = vdevice & 0xff; + int i; + + if (vdevice & (1 << 28)) { + *unit = (vdevice & ((1 << 28) - 1)) >> 8; + *name = "xbd"; + } + + for (i = 0; info[i].major; i++) { + if (info[i].major == major) { + *unit = info[i].base + (minor >> info[i].shift); + *name = info[i].name; + return; + } + } + + *unit = minor >> 4; + *name = "xbd"; +} + +int +xlvbd_add(device_t dev, blkif_sector_t capacity, + int vdevice, uint16_t vdisk_info, uint16_t sector_size, + struct blkfront_info *info) +{ + struct xb_softc *sc; + int unit, error = 0; + const char *name; + + blkfront_vdevice_to_unit(vdevice, &unit, &name); + + sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); + sc->xb_unit = unit; + sc->xb_info = info; + info->sc = sc; + + if (strcmp(name, "xbd")) + device_printf(dev, "attaching as %s%d\n", name, unit); + + memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); + sc->xb_disk = disk_alloc(); + sc->xb_disk->d_unit = sc->xb_unit; + sc->xb_disk->d_open = blkif_open; + sc->xb_disk->d_close = blkif_close; + sc->xb_disk->d_ioctl = blkif_ioctl; + sc->xb_disk->d_strategy = xb_strategy; + sc->xb_disk->d_name = name; + sc->xb_disk->d_drv1 = sc; + sc->xb_disk->d_sectorsize = sector_size; + + /* XXX */ + sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT; +#if 0 + sc->xb_disk->d_maxsize = DFLTPHYS; +#else /* XXX: xen can't handle large single i/o requests */ + sc->xb_disk->d_maxsize = 4096; +#endif +#ifdef notyet + XENPRINTF("attaching device 0x%x unit %d capacity %llu\n", + xb_diskinfo[sc->xb_unit].device, sc->xb_unit, + sc->xb_disk->d_mediasize); +#endif + sc->xb_disk->d_flags = 0; + disk_create(sc->xb_disk, DISK_VERSION_00); + bioq_init(&sc->xb_bioq); + + return error; +} + +void +xlvbd_del(struct blkfront_info *info) +{ + struct xb_softc *sc; + + sc = info->sc; + disk_destroy(sc->xb_disk); +} +/************************ end VBD support *****************/ + +/* + * Read/write routine for a buffer. Finds the proper unit, place it on + * the sortq and kick the controller. + */ +static void +xb_strategy(struct bio *bp) +{ + struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; + + /* bogus disk? */ + if (sc == NULL) { + bp->bio_error = EINVAL; + bp->bio_flags |= BIO_ERROR; + goto bad; + } + + DPRINTK(""); + + /* + * Place it in the queue of disk activities for this disk + */ + mtx_lock(&blkif_io_lock); + bioq_disksort(&sc->xb_bioq, bp); + + xb_startio(sc); + mtx_unlock(&blkif_io_lock); + return; + + bad: + /* + * Correctly set the bio to indicate a failed tranfer. + */ + bp->bio_resid = bp->bio_bcount; + biodone(bp); + return; +} + +static int +blkfront_probe(device_t dev) +{ + + if (!strcmp(xenbus_get_type(dev), "vbd")) { + device_set_desc(dev, "Virtual Block Device"); + device_quiet(dev); + return (0); + } + + return (ENXIO); +} + +/* + * Setup supplies the backend dir, virtual device. We place an event + * channel and shared frame entries. We watch backend to wait if it's + * ok. + */ +static int +blkfront_attach(device_t dev) +{ + int error, vdevice, i, unit; + struct blkfront_info *info; + const char *name; + + /* FIXME: Use dynamic device id if this is not set. */ + error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev), + "virtual-device", NULL, "%i", &vdevice); + if (error) { + xenbus_dev_fatal(dev, error, "reading virtual-device"); + printf("couldn't find virtual device"); + return (error); + } + + blkfront_vdevice_to_unit(vdevice, &unit, &name); + if (!strcmp(name, "xbd")) + device_set_unit(dev, unit); + + info = device_get_softc(dev); + + /* + * XXX debug only + */ + for (i = 0; i < sizeof(*info); i++) + if (((uint8_t *)info)[i] != 0) + panic("non-null memory"); + + info->shadow_free = 0; + info->xbdev = dev; + info->vdevice = vdevice; + info->connected = BLKIF_STATE_DISCONNECTED; + + /* work queue needed ? */ + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Front end dir is a number, which is used as the id. */ + info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0); + + error = talk_to_backend(dev, info); + if (error) + return (error); + + return (0); +} + +static int +blkfront_suspend(device_t dev) +{ + struct blkfront_info *info = device_get_softc(dev); + + /* Prevent new requests being issued until we fix things up. */ + mtx_lock(&blkif_io_lock); + info->connected = BLKIF_STATE_SUSPENDED; + mtx_unlock(&blkif_io_lock); + + return (0); +} + +static int +blkfront_resume(device_t dev) +{ + struct blkfront_info *info = device_get_softc(dev); + int err; + + DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev)); + + blkif_free(info, 1); + err = talk_to_backend(dev, info); + if (info->connected == BLKIF_STATE_SUSPENDED && !err) + blkif_recover(info); + + return (err); +} + +/* Common code used when first setting up, and when resuming. */ +static int +talk_to_backend(device_t dev, struct blkfront_info *info) +{ + const char *message = NULL; + struct xenbus_transaction xbt; + int err; + + /* Create shared ring, alloc event channel. */ + err = setup_blkring(dev, info); + if (err) + goto out; + + again: + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_dev_fatal(dev, err, "starting transaction"); + goto destroy_blkring; + } + + err = xenbus_printf(xbt, xenbus_get_node(dev), + "ring-ref","%u", info->ring_ref); + if (err) { + message = "writing ring-ref"; + goto abort_transaction; + } + err = xenbus_printf(xbt, xenbus_get_node(dev), + "event-channel", "%u", irq_to_evtchn_port(info->irq)); + if (err) { + message = "writing event-channel"; + goto abort_transaction; + } + err = xenbus_printf(xbt, xenbus_get_node(dev), + "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); + if (err) { + message = "writing protocol"; + goto abort_transaction; + } + + err = xenbus_transaction_end(xbt, 0); + if (err) { + if (err == EAGAIN) + goto again; + xenbus_dev_fatal(dev, err, "completing transaction"); + goto destroy_blkring; + } + xenbus_set_state(dev, XenbusStateInitialised); + + return 0; + + abort_transaction: + xenbus_transaction_end(xbt, 1); + if (message) + xenbus_dev_fatal(dev, err, "%s", message); + destroy_blkring: + blkif_free(info, 0); + out: + return err; +} + +static int +setup_blkring(device_t dev, struct blkfront_info *info) +{ + blkif_sring_t *sring; + int error; + + info->ring_ref = GRANT_INVALID_REF; + + sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + if (sring == NULL) { + xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring"); + return ENOMEM; + } + SHARED_RING_INIT(sring); + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); + + error = xenbus_grant_ring(dev, + (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref); + if (error) { + free(sring, M_DEVBUF); + info->ring.sring = NULL; + goto fail; + } + + error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), + "xbd", (driver_intr_t *)blkif_int, info, + INTR_TYPE_BIO | INTR_MPSAFE, &info->irq); + if (error) { + xenbus_dev_fatal(dev, error, + "bind_evtchn_to_irqhandler failed"); + goto fail; + } + + return (0); + fail: + blkif_free(info, 0); + return (error); +} + + +/** + * Callback received when the backend's state changes. + */ +static void +blkfront_backend_changed(device_t dev, XenbusState backend_state) +{ + struct blkfront_info *info = device_get_softc(dev); + + DPRINTK("backend_state=%d\n", backend_state); + + switch (backend_state) { + case XenbusStateUnknown: + case XenbusStateInitialising: + case XenbusStateInitWait: + case XenbusStateInitialised: + case XenbusStateClosed: + case XenbusStateReconfigured: + case XenbusStateReconfiguring: + break; + + case XenbusStateConnected: + connect(dev, info); + break; + + case XenbusStateClosing: + if (info->users > 0) + xenbus_dev_error(dev, -EBUSY, + "Device in use; refusing to close"); + else + blkfront_closing(dev); +#ifdef notyet + bd = bdget(info->dev); + if (bd == NULL) + xenbus_dev_fatal(dev, -ENODEV, "bdget failed"); + + down(&bd->bd_sem); + if (info->users > 0) + xenbus_dev_error(dev, -EBUSY, + "Device in use; refusing to close"); + else + blkfront_closing(dev); + up(&bd->bd_sem); + bdput(bd); +#endif + } +} + +/* +** Invoked when the backend is finally 'ready' (and has told produced +** the details about the physical device - #sectors, size, etc). +*/ +static void +connect(device_t dev, struct blkfront_info *info) +{ + unsigned long sectors, sector_size; + unsigned int binfo; + int err; + + if( (info->connected == BLKIF_STATE_CONNECTED) || + (info->connected == BLKIF_STATE_SUSPENDED) ) + return; + + DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev)); + + err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), + "sectors", "%lu", §ors, + "info", "%u", &binfo, + "sector-size", "%lu", §or_size, + NULL); + if (err) { + xenbus_dev_fatal(dev, err, + "reading backend fields at %s", + xenbus_get_otherend_path(dev)); + return; + } + err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev), + "feature-barrier", "%lu", &info->feature_barrier, + NULL); + if (err) + info->feature_barrier = 0; + + device_printf(dev, "%juMB <%s> at %s", + (uintmax_t) sectors / (1048576 / sector_size), + device_get_desc(dev), + xenbus_get_node(dev)); + bus_print_child_footer(device_get_parent(dev), dev); + + xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info); + + (void)xenbus_set_state(dev, XenbusStateConnected); + + /* Kick pending requests. */ + mtx_lock(&blkif_io_lock); + info->connected = BLKIF_STATE_CONNECTED; + kick_pending_request_queues(info); + mtx_unlock(&blkif_io_lock); + info->is_ready = 1; + +#if 0 + add_disk(info->gd); +#endif +} + +/** + * Handle the change of state of the backend to Closing. We must delete our + * device-layer structures now, to ensure that writes are flushed through to + * the backend. Once is this done, we can switch to Closed in + * acknowledgement. + */ +static void +blkfront_closing(device_t dev) +{ + struct blkfront_info *info = device_get_softc(dev); + + DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev)); + + if (info->mi) { + DPRINTK("Calling xlvbd_del\n"); + xlvbd_del(info); + info->mi = NULL; + } + + xenbus_set_state(dev, XenbusStateClosed); +} + + +static int +blkfront_detach(device_t dev) +{ + struct blkfront_info *info = device_get_softc(dev); + + DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev)); + + blkif_free(info, 0); + + return 0; +} + + +static inline int +GET_ID_FROM_FREELIST(struct blkfront_info *info) +{ + unsigned long nfree = info->shadow_free; + + KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree)); + info->shadow_free = info->shadow[nfree].req.id; + info->shadow[nfree].req.id = 0x0fffffee; /* debug */ + return nfree; +} + +static inline void +ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id) +{ + info->shadow[id].req.id = info->shadow_free; + info->shadow[id].request = 0; + info->shadow_free = id; +} + +static inline void +flush_requests(struct blkfront_info *info) +{ + int notify; + + RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify); + + if (notify) + notify_remote_via_irq(info->irq); +} + +static void +kick_pending_request_queues(struct blkfront_info *info) +{ + /* XXX check if we can't simplify */ +#if 0 + if (!RING_FULL(&info->ring)) { + /* Re-enable calldowns. */ + blk_start_queue(info->rq); + /* Kick things off immediately. */ + do_blkif_request(info->rq); + } +#endif + if (!RING_FULL(&info->ring)) { +#if 0 + sc = LIST_FIRST(&xbsl_head); + LIST_REMOVE(sc, entry); + /* Re-enable calldowns. */ + blk_start_queue(di->rq); +#endif + /* Kick things off immediately. */ + xb_startio(info->sc); + } +} + +#if 0 +/* XXX */ +static void blkif_restart_queue(void *arg) +{ + struct blkfront_info *info = (struct blkfront_info *)arg; + + mtx_lock(&blkif_io_lock); + kick_pending_request_queues(info); + mtx_unlock(&blkif_io_lock); +} +#endif + +static void blkif_restart_queue_callback(void *arg) +{ +#if 0 + struct blkfront_info *info = (struct blkfront_info *)arg; + /* XXX BSD equiv ? */ + + schedule_work(&info->work); +#endif +} + +static int +blkif_open(struct disk *dp) +{ + struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; + + if (sc == NULL) { + printf("xb%d: not found", sc->xb_unit); + return (ENXIO); + } + + sc->xb_flags |= XB_OPEN; + sc->xb_info->users++; + return (0); +} + +static int +blkif_close(struct disk *dp) +{ + struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; + + if (sc == NULL) + return (ENXIO); + sc->xb_flags &= ~XB_OPEN; + if (--(sc->xb_info->users) == 0) { + /* Check whether we have been instructed to close. We will + have ignored this request initially, as the device was + still mounted. */ + device_t dev = sc->xb_info->xbdev; + XenbusState state = + xenbus_read_driver_state(xenbus_get_otherend_path(dev)); + + if (state == XenbusStateClosing) + blkfront_closing(dev); + } + return (0); +} + +static int +blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td) +{ + struct xb_softc *sc = (struct xb_softc *)dp->d_drv1; + + if (sc == NULL) + return (ENXIO); + + return (ENOTTY); +} + + +/* + * blkif_queue_request + * + * request block io + * + * id: for guest use only. + * operation: BLKIF_OP_{READ,WRITE,PROBE} + * buffer: buffer to read/write into. this should be a + * virtual address in the guest os. + */ +static int blkif_queue_request(struct bio *bp) +{ + caddr_t alignbuf; + vm_paddr_t buffer_ma; + blkif_request_t *ring_req; + unsigned long id; + uint64_t fsect, lsect; + struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1; + struct blkfront_info *info = sc->xb_info; + int ref; + + if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED)) + return 1; + + if (gnttab_alloc_grant_references( + BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) { + gnttab_request_free_callback( + &info->callback, + blkif_restart_queue_callback, + info, + BLKIF_MAX_SEGMENTS_PER_REQUEST); + return 1; + } + + /* Check if the buffer is properly aligned */ + if ((vm_offset_t)bp->bio_data & PAGE_MASK) { + int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : + PAGE_SIZE; + caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, + M_NOWAIT); + + alignbuf = (char *)roundup2((u_long)newbuf, align); + + /* save a copy of the current buffer */ + bp->bio_driver1 = newbuf; + bp->bio_driver2 = alignbuf; + + /* Copy the data for a write */ + if (bp->bio_cmd == BIO_WRITE) + bcopy(bp->bio_data, alignbuf, bp->bio_bcount); + } else + alignbuf = bp->bio_data; + + /* Fill out a communications ring structure. */ + ring_req = RING_GET_REQUEST(&info->ring, + info->ring.req_prod_pvt); + id = GET_ID_FROM_FREELIST(info); + info->shadow[id].request = (unsigned long)bp; + + ring_req->id = id; + ring_req->operation = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ : + BLKIF_OP_WRITE; + + ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno; + ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xb_disk; + + ring_req->nr_segments = 0; /* XXX not doing scatter/gather since buffer + * chaining is not supported. + */ + + buffer_ma = vtomach(alignbuf); + fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT; + lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1; + /* install a grant reference. */ + ref = gnttab_claim_grant_reference(&gref_head); + KASSERT( ref != -ENOSPC, ("grant_reference failed") ); + + gnttab_grant_foreign_access_ref( + ref, + xenbus_get_otherend_id(info->xbdev), + buffer_ma >> PAGE_SHIFT, + ring_req->operation & 1 ); /* ??? */ + info->shadow[id].frame[ring_req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + + ring_req->seg[ring_req->nr_segments] = + (struct blkif_request_segment) { + .gref = ref, + .first_sect = fsect, + .last_sect = lsect }; + + ring_req->nr_segments++; + KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0, + ("XEN buffer must be sector aligned")); + KASSERT(lsect <= 7, + ("XEN disk driver data cannot cross a page boundary")); + + buffer_ma &= ~PAGE_MASK; + + info->ring.req_prod_pvt++; + + /* Keep a private copy so we can reissue requests when recovering. */ + info->shadow[id].req = *ring_req; + + gnttab_free_grant_references(gref_head); + + return 0; +} + + + +/* + * Dequeue buffers and place them in the shared communication ring. + * Return when no more requests can be accepted or all buffers have + * been queued. + * + * Signal XEN once the ring has been filled out. + */ +static void +xb_startio(struct xb_softc *sc) +{ + struct bio *bp; + int queued = 0; + struct blkfront_info *info = sc->xb_info; + DPRINTK(""); + + mtx_assert(&blkif_io_lock, MA_OWNED); + + while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) { + + if (RING_FULL(&info->ring)) + goto wait; + + if (blkif_queue_request(bp)) { + wait: + bioq_insert_head(&sc->xb_bioq, bp); + break; + } + queued++; + } + + if (queued != 0) + flush_requests(sc->xb_info); +} + +static void +blkif_int(void *xsc) +{ + struct xb_softc *sc = NULL; + struct bio *bp; + blkif_response_t *bret; + RING_IDX i, rp; + struct blkfront_info *info = xsc; + DPRINTK(""); + + TRACE_ENTER; + + mtx_lock(&blkif_io_lock); + + if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) { + mtx_unlock(&blkif_io_lock); + return; + } + + again: + rp = info->ring.sring->rsp_prod; + rmb(); /* Ensure we see queued responses up to 'rp'. */ + + for (i = info->ring.rsp_cons; i != rp; i++) { + unsigned long id; + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; + bp = (struct bio *)info->shadow[id].request; + + blkif_completion(&info->shadow[id]); + + ADD_ID_TO_FREELIST(info, id); + + switch (bret->operation) { + case BLKIF_OP_READ: + /* had an unaligned buffer that needs to be copied */ + if (bp->bio_driver1) + bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount); + /* FALLTHROUGH */ + case BLKIF_OP_WRITE: + + /* free the copy buffer */ + if (bp->bio_driver1) { + free(bp->bio_driver1, M_DEVBUF); + bp->bio_driver1 = NULL; + } + + if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) { + printf("Bad return from blkdev data request: %x\n", + bret->status); + bp->bio_flags |= BIO_ERROR; + } + + sc = (struct xb_softc *)bp->bio_disk->d_drv1; + + if (bp->bio_flags & BIO_ERROR) + bp->bio_error = EIO; + else + bp->bio_resid = 0; + + biodone(bp); + break; + default: + panic("received invalid operation"); + break; + } + } + + info->ring.rsp_cons = i; + + if (i != info->ring.req_prod_pvt) { + int more_to_do; + RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do); + if (more_to_do) + goto again; + } else { + info->ring.sring->rsp_event = i + 1; + } + + kick_pending_request_queues(info); + + mtx_unlock(&blkif_io_lock); +} + +static void +blkif_free(struct blkfront_info *info, int suspend) +{ + +/* Prevent new requests being issued until we fix things up. */ + mtx_lock(&blkif_io_lock); + info->connected = suspend ? + BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; + mtx_unlock(&blkif_io_lock); + + /* Free resources associated with old device channel. */ + if (info->ring_ref != GRANT_INVALID_REF) { + gnttab_end_foreign_access(info->ring_ref, + info->ring.sring); + info->ring_ref = GRANT_INVALID_REF; + info->ring.sring = NULL; + } + if (info->irq) + unbind_from_irqhandler(info->irq); + info->irq = 0; + +} + +static void +blkif_completion(struct blk_shadow *s) +{ + int i; + + for (i = 0; i < s->req.nr_segments; i++) + gnttab_end_foreign_access(s->req.seg[i].gref, 0UL); +} + +static void +blkif_recover(struct blkfront_info *info) +{ + int i, j; + blkif_request_t *req; + struct blk_shadow *copy; + + if (!info->sc) + return; + + /* Stage 1: Make a safe copy of the shadow state. */ + copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO); + memcpy(copy, info->shadow, sizeof(info->shadow)); + + /* Stage 2: Set up free list. */ + memset(&info->shadow, 0, sizeof(info->shadow)); + for (i = 0; i < BLK_RING_SIZE; i++) + info->shadow[i].req.id = i+1; + info->shadow_free = info->ring.req_prod_pvt; + info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Stage 3: Find pending requests and requeue them. */ + for (i = 0; i < BLK_RING_SIZE; i++) { + /* Not in use? */ + if (copy[i].request == 0) + continue; + + /* Grab a request slot and copy shadow state into it. */ + req = RING_GET_REQUEST( + &info->ring, info->ring.req_prod_pvt); + *req = copy[i].req; + + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(info); + memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i])); + + /* Rewrite any grant references invalidated by suspend/resume. */ + for (j = 0; j < req->nr_segments; j++) + gnttab_grant_foreign_access_ref( + req->seg[j].gref, + xenbus_get_otherend_id(info->xbdev), + pfn_to_mfn(info->shadow[req->id].frame[j]), + 0 /* assume not readonly */); + + info->shadow[req->id].req = *req; + + info->ring.req_prod_pvt++; + } + + free(copy, M_DEVBUF); + + xenbus_set_state(info->xbdev, XenbusStateConnected); + + /* Now safe for us to use the shared ring */ + mtx_lock(&blkif_io_lock); + info->connected = BLKIF_STATE_CONNECTED; + mtx_unlock(&blkif_io_lock); + + /* Send off requeued requests */ + mtx_lock(&blkif_io_lock); + flush_requests(info); + + /* Kick any other new requests queued since we resumed */ + kick_pending_request_queues(info); + mtx_unlock(&blkif_io_lock); +} + +/* ** Driver registration ** */ +static device_method_t blkfront_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, blkfront_probe), + DEVMETHOD(device_attach, blkfront_attach), + DEVMETHOD(device_detach, blkfront_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, blkfront_suspend), + DEVMETHOD(device_resume, blkfront_resume), + + /* Xenbus interface */ + DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed), + + { 0, 0 } +}; + +static driver_t blkfront_driver = { + "xbd", + blkfront_methods, + sizeof(struct blkfront_info), +}; +devclass_t blkfront_devclass; + +DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); + +MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */ + Property changes on: dev/xen/blkfront/blkfront.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/console/xencons_ring.h =================================================================== --- dev/xen/console/xencons_ring.h (.../stable/6/sys) (revision 0) +++ dev/xen/console/xencons_ring.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,20 @@ +/* + * $FreeBSD$ + * + */ +#ifndef _XENCONS_RING_H +#define _XENCONS_RING_H + +int xencons_ring_init(void); +int xencons_ring_send(const char *data, unsigned len); +void xencons_rx(char *buf, unsigned len); +void xencons_tx(void); + + +typedef void (xencons_receiver_func)(char *buf, unsigned len); +void xencons_ring_register_receiver(xencons_receiver_func *f); + +void xencons_handle_input(void *unused); +int xencons_has_input(void); + +#endif /* _XENCONS_RING_H */ Property changes on: dev/xen/console/xencons_ring.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/console/console.c =================================================================== --- dev/xen/console/console.c (.../stable/6/sys) (revision 0) +++ dev/xen/console/console.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,569 @@ +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +#include "opt_ddb.h" +#ifdef DDB +#include +#endif + +static char driver_name[] = "xc"; +devclass_t xc_devclass; /* do not make static */ +static void xcstart (struct tty *); +static int xcparam (struct tty *, struct termios *); +static void xcstop (struct tty *, int); +static void xc_timeout(void *); +static void __xencons_tx_flush(void); +static boolean_t xcons_putc(int c); + +/* switch console so that shutdown can occur gracefully */ +static void xc_shutdown(void *arg, int howto); +static int xc_mute; + +static void xcons_force_flush(void); +static void xencons_priv_interrupt(void *); + +static cn_probe_t xccnprobe; +static cn_init_t xccninit; +static cn_getc_t xccngetc; +static cn_putc_t xccnputc; +static cn_putc_t xccnputc_dom0; +static cn_checkc_t xccncheckc; + +#define XC_POLLTIME (hz/10) + +CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, + xccncheckc, xccnputc, NULL); + +static int xen_console_up; +static boolean_t xc_start_needed; +static struct callout xc_callout; +struct mtx cn_mtx; + +#define RBUF_SIZE 1024 +#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1)) +#define WBUF_SIZE 4096 +#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1)) +static char wbuf[WBUF_SIZE]; +static char rbuf[RBUF_SIZE]; +static int rc, rp; +static unsigned int cnsl_evt_reg; +static unsigned int wc, wp; /* write_cons, write_prod */ + +#define CDEV_MAJOR 12 +#define XCUNIT(x) (minor(x)) +#define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN)) +#define CN_LOCK_INIT(x, _name) \ + mtx_init(&x, _name, NULL, MTX_DEF|MTX_RECURSE) + +#define CN_LOCK(l) \ + do { \ + if (panicstr == NULL) \ + mtx_lock(&(l)); \ + } while (0) +#define CN_UNLOCK(l) \ + do { \ + if (panicstr == NULL) \ + mtx_unlock(&(l)); \ + } while (0) +#define CN_LOCK_ASSERT(x) mtx_assert(&x, MA_OWNED) +#define CN_LOCK_DESTROY(x) mtx_destroy(&x) + + +static struct tty *xccons; + +struct xc_softc { + int xc_unit; + struct cdev *xc_dev; +}; + + +static d_open_t xcopen; +static d_close_t xcclose; +static d_ioctl_t xcioctl; + +static struct cdevsw xc_cdevsw = { + .d_version = D_VERSION, + .d_flags = D_TTY | D_NEEDGIANT, + .d_name = driver_name, + .d_open = xcopen, + .d_close = xcclose, + .d_read = ttyread, + .d_write = ttywrite, + .d_ioctl = xcioctl, + .d_poll = ttypoll, + .d_kqfilter = ttykqfilter, +}; + +static void +xccnprobe(struct consdev *cp) +{ + cp->cn_pri = CN_REMOTE; + cp->cn_tp = xccons; + sprintf(cp->cn_name, "%s0", driver_name); +} + + +static void +xccninit(struct consdev *cp) +{ + CN_LOCK_INIT(cn_mtx,"XCONS LOCK"); + +} +int +xccngetc(struct consdev *dev) +{ + int c; + if (xc_mute) + return 0; + do { + if ((c = xccncheckc(dev)) == -1) { +#ifdef KDB + if (!kdb_active) +#endif + /* + * Polling without sleeping in Xen + * doesn't work well. Sleeping gives + * other things like clock a chance to + * run + */ + tsleep(&cn_mtx, PWAIT | PCATCH, + "console sleep", XC_POLLTIME); + } + } while(c == -1); + return c; +} + +int +xccncheckc(struct consdev *dev) +{ + int ret = (xc_mute ? 0 : -1); + + if (xencons_has_input()) + xencons_handle_input(NULL); + + CN_LOCK(cn_mtx); + if ((rp - rc)) { + if (kdb_active) printf("%s:%d\n", __func__, __LINE__); + /* we need to return only one char */ + ret = (int)rbuf[RBUF_MASK(rc)]; + rc++; + } + CN_UNLOCK(cn_mtx); + return(ret); +} + +static void +xccnputc(struct consdev *dev, int c) +{ + xcons_putc(c); +} + +static void +xccnputc_dom0(struct consdev *dev, int c) +{ + HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c); +} + +extern int db_active; +static boolean_t +xcons_putc(int c) +{ + int force_flush = xc_mute || +#ifdef DDB + db_active || +#endif + panicstr; /* we're not gonna recover, so force + * flush + */ + + if ((wp-wc) < (WBUF_SIZE-1)) { + if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') { + wbuf[WBUF_MASK(wp++)] = '\r'; +#ifdef notyet + if (force_flush) + xcons_force_flush(); +#endif + } + } else if (force_flush) { +#ifdef notyet + xcons_force_flush(); +#endif + } + if (cnsl_evt_reg) + __xencons_tx_flush(); + + /* inform start path that we're pretty full */ + return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE; +} + +static void +xc_identify(driver_t *driver, device_t parent) +{ + device_t child; + child = BUS_ADD_CHILD(parent, 0, driver_name, 0); + device_set_driver(child, driver); + device_set_desc(child, "Xen Console"); +} + +static int +xc_probe(device_t dev) +{ + struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); + + sc->xc_unit = device_get_unit(dev); + return (0); +} + +static int +xc_attach(device_t dev) +{ + int error; + struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev); + int error; + + if (xen_start_info->flags & SIF_INITDOMAIN) { + xc_consdev.cn_putc = xccnputc_dom0; + } + + sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0); + xccons = ttyalloc(); + + sc->xc_dev->si_drv1 = (void *)sc; + sc->xc_dev->si_tty = xccons; + + xccons->t_oproc = xcstart; + xccons->t_param = xcparam; + xccons->t_stop = xcstop; + xccons->t_dev = sc->xc_dev; + + callout_init(&xc_callout, 0); + + xencons_ring_init(); + + cnsl_evt_reg = 1; + callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons); + + if (xen_start_info->flags & SIF_INITDOMAIN) { + error = bind_virq_to_irqhandler( + VIRQ_CONSOLE, + 0, + "console", + xencons_priv_interrupt, + INTR_TYPE_TTY, NULL); + + KASSERT(error >= 0, ("can't register console interrupt")); + } + + /* register handler to flush console on shutdown */ + if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown, + NULL, SHUTDOWN_PRI_DEFAULT)) == NULL) + printf("xencons: shutdown event registration failed!\n"); + + return (0); +} + +/* + * return 0 for all console input, force flush all output. + */ +static void +xc_shutdown(void *arg, int howto) +{ + xc_mute = 1; + xcons_force_flush(); +} + +void +xencons_rx(char *buf, unsigned len) +{ + int i; + struct tty *tp = xccons; + +#if 1 + if (len > 0 && buf[0] == '`') + printf("%08lx %08lx\r", + HYPERVISOR_shared_info->evtchn_pending[0], + HYPERVISOR_shared_info->evtchn_mask[0]); +#endif + for (i = 0; i < len; i++) { + if (xen_console_up +#ifdef DDB + && !kdb_active +#endif + ) + (*linesw[tp->t_line]->l_rint)(buf[i], tp); + else + rbuf[RBUF_MASK(rp++)] = buf[i]; + } +} + +static void +__xencons_tx_flush(void) +{ + int sz, work_done = 0; + + CN_LOCK(cn_mtx); + while (wc != wp) { + int sent; + sz = wp - wc; + if (sz > (WBUF_SIZE - WBUF_MASK(wc))) + sz = WBUF_SIZE - WBUF_MASK(wc); + if (xen_start_info->flags & SIF_INITDOMAIN) { + HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]); + wc += sz; + } else { + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); + if (sent == 0) + break; + wc += sent; + } + work_done = 1; + } + CN_UNLOCK(cn_mtx); + + /* + * ttwakeup calls routines using blocking locks + * + */ + if (work_done && xen_console_up && curthread->td_critnest == 0) + ttwakeup(xccons); +} + +void +xencons_tx(void) +{ + __xencons_tx_flush(); +} + +static void +xencons_priv_interrupt(void *arg) +{ + + static char rbuf[16]; + int l; + + while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0) + xencons_rx(rbuf, l); + + xencons_tx(); +} + +int +xcopen(struct cdev *dev, int flag, int mode, struct thread *td) +{ + struct xc_softc *sc; + int unit = XCUNIT(dev); + struct tty *tp; + int s, error; + + sc = (struct xc_softc *)device_get_softc( + devclass_get_device(xc_devclass, unit)); + if (sc == NULL) + return (ENXIO); + + tp = dev->si_tty; + s = spltty(); + if (!ISTTYOPEN(tp)) { + tp->t_state |= TS_CARR_ON; + ttychars(tp); + tp->t_iflag = TTYDEF_IFLAG; + tp->t_oflag = TTYDEF_OFLAG; + tp->t_cflag = TTYDEF_CFLAG|CLOCAL; + tp->t_lflag = TTYDEF_LFLAG; + tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED; + xcparam(tp, &tp->t_termios); + ttsetwater(tp); + } else if (tp->t_state & TS_XCLUDE && suser(td)) { + splx(s); + return (EBUSY); + } + splx(s); + + xen_console_up = 1; + + error = (*linesw[tp->t_line]->l_open)(dev, tp); + return error; +} + +int +xcclose(struct cdev *dev, int flag, int mode, struct thread *td) +{ + struct tty *tp = dev->si_tty; + + if (tp == NULL) + return (0); + xen_console_up = 0; + + spltty(); + (*linesw[tp->t_line]->l_close)(tp, flag); + tty_close(tp); + spl0(); + return (0); +} + + +int +xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) +{ + struct tty *tp = dev->si_tty; + int error; + + error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td); + if (error != ENOIOCTL) + return (error); + + error = ttioctl(tp, cmd, data, flag); + + if (error != ENOIOCTL) + return (error); + + return (ENOTTY); +} + +static inline int +__xencons_put_char(int ch) +{ + char _ch = (char)ch; + if ((wp - wc) == WBUF_SIZE) + return 0; + wbuf[WBUF_MASK(wp++)] = _ch; + return 1; +} + + +static void +xcstart(struct tty *tp) +{ + boolean_t cons_full = FALSE; + + CN_LOCK(cn_mtx); + if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { + CN_UNLOCK(cn_mtx); + + ttwwakeup(tp); + return; + } + + tp->t_state |= TS_BUSY; + CN_UNLOCK(cn_mtx); + + while (tp->t_outq.c_cc != 0 && !cons_full) + cons_full = xcons_putc(getc(&tp->t_outq)); + + /* if the console is close to full leave our state as busy */ + if (!cons_full) { + CN_LOCK(cn_mtx); + tp->t_state &= ~TS_BUSY; + CN_UNLOCK(cn_mtx); + ttwwakeup(tp); + } else { + /* let the timeout kick us in a bit */ + xc_start_needed = TRUE; + } + +} + +static void +xcstop(struct tty *tp, int flag) +{ + + if (tp->t_state & TS_BUSY) { + if ((tp->t_state & TS_TTSTOP) == 0) { + tp->t_state |= TS_FLUSH; + } + } +} + +static void +xc_timeout(void *v) +{ + struct tty *tp; + int c; + + tp = (struct tty *)v; + + while ((c = xccncheckc(NULL)) != -1) { + if (tp->t_state & TS_ISOPEN) { + (*linesw[tp->t_line]->l_rint)(c, tp); + } + } + + if (xc_start_needed) { + xc_start_needed = FALSE; + xcstart(tp); + } + + callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp); +} + +/* + * Set line parameters. + */ +int +xcparam(struct tty *tp, struct termios *t) +{ + tp->t_ispeed = t->c_ispeed; + tp->t_ospeed = t->c_ospeed; + tp->t_cflag = t->c_cflag; + return (0); +} + + +static device_method_t xc_methods[] = { + DEVMETHOD(device_identify, xc_identify), + DEVMETHOD(device_probe, xc_probe), + DEVMETHOD(device_attach, xc_attach), + {0, 0} +}; + +static driver_t xc_driver = { + driver_name, + xc_methods, + sizeof(struct xc_softc), +}; + +/*** Forcibly flush console data before dying. ***/ +void +xcons_force_flush(void) +{ + int sz; + + if (xen_start_info->flags & SIF_INITDOMAIN) + return; + + /* Spin until console data is flushed through to the domain controller. */ + while (wc != wp) { + int sent = 0; + if ((sz = wp - wc) == 0) + continue; + + sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz); + if (sent > 0) + wc += sent; + } +} + +DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0); Property changes on: dev/xen/console/console.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/console/xencons_ring.c =================================================================== --- dev/xen/console/xencons_ring.c (.../stable/6/sys) (revision 0) +++ dev/xen/console/xencons_ring.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,165 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#define console_evtchn console.domU.evtchn +static unsigned int console_irq; +extern char *console_page; +extern struct mtx cn_mtx; + +static inline struct xencons_interface * +xencons_interface(void) +{ + return (struct xencons_interface *)console_page; +} + + +int +xencons_has_input(void) +{ + struct xencons_interface *intf; + + intf = xencons_interface(); + + return (intf->in_cons != intf->in_prod); +} + + +int +xencons_ring_send(const char *data, unsigned len) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + int sent; + + intf = xencons_interface(); + cons = intf->out_cons; + prod = intf->out_prod; + sent = 0; + + mb(); + KASSERT((prod - cons) <= sizeof(intf->out), + ("console send ring inconsistent")); + + while ((sent < len) && ((prod - cons) < sizeof(intf->out))) + intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++]; + + wmb(); + intf->out_prod = prod; + + notify_remote_via_evtchn(xen_start_info->console_evtchn); + + return sent; + +} + + +static xencons_receiver_func *xencons_receiver; + +void +xencons_handle_input(void *unused) +{ + struct xencons_interface *intf; + XENCONS_RING_IDX cons, prod; + + mtx_lock(&cn_mtx); + intf = xencons_interface(); + + cons = intf->in_cons; + prod = intf->in_prod; + + /* XXX needs locking */ + while (cons != prod) { + xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1); + cons++; + } + + mb(); + intf->in_cons = cons; + + notify_remote_via_evtchn(xen_start_info->console_evtchn); + + xencons_tx(); + mtx_unlock(&cn_mtx); +} + +void +xencons_ring_register_receiver(xencons_receiver_func *f) +{ + xencons_receiver = f; +} + +int +xencons_ring_init(void) +{ + int err; + + if (!xen_start_info->console_evtchn) + return 0; + + err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn, + "xencons", xencons_handle_input, NULL, + INTR_TYPE_MISC | INTR_MPSAFE, &console_irq); + if (err) { + return err; + } + + return 0; +} + +extern void xencons_suspend(void); +extern void xencons_resume(void); + +void +xencons_suspend(void) +{ + + if (!xen_start_info->console_evtchn) + return; + + unbind_from_irqhandler(console_irq); +} + +void +xencons_resume(void) +{ + + (void)xencons_ring_init(); +} + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 8 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Property changes on: dev/xen/console/xencons_ring.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/pcifront/pcifront.c =================================================================== --- dev/xen/pcifront/pcifront.c (.../stable/6/sys) (revision 0) +++ dev/xen/pcifront/pcifront.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,688 @@ +/* + * Copyright (c) 2006, Cisco Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include "pcib_if.h" + +#ifdef XEN_PCIDEV_FE_DEBUG +#define DPRINTF(fmt, args...) \ + printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTF(fmt, args...) ((void)0) +#endif +#define WPRINTF(fmt, args...) \ + printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) + +#define INVALID_GRANT_REF (0) +#define INVALID_EVTCHN (-1) +#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT) + +struct pcifront_device { + STAILQ_ENTRY(pcifront_device) next; + + struct xenbus_device *xdev; + + int unit; + int evtchn; + int gnt_ref; + + /* Lock this when doing any operations in sh_info */ + struct mtx sh_info_lock; + struct xen_pci_sharedinfo *sh_info; + + device_t ndev; + + int ref_cnt; +}; + +static STAILQ_HEAD(pcifront_dlist, pcifront_device) pdev_list = STAILQ_HEAD_INITIALIZER(pdev_list); + +struct xpcib_softc { + int domain; + int bus; + struct pcifront_device *pdev; +}; + +/* Allocate a PCI device structure */ +static struct pcifront_device * +alloc_pdev(struct xenbus_device *xdev) +{ + struct pcifront_device *pdev = NULL; + int err, unit; + + err = sscanf(xdev->nodename, "device/pci/%d", &unit); + if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, "Error scanning pci device instance number"); + goto out; + } + + pdev = (struct pcifront_device *)malloc(sizeof(struct pcifront_device), M_DEVBUF, M_NOWAIT); + if (pdev == NULL) { + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, "Error allocating pcifront_device struct"); + goto out; + } + pdev->unit = unit; + pdev->xdev = xdev; + pdev->ref_cnt = 1; + + pdev->sh_info = (struct xen_pci_sharedinfo *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (pdev->sh_info == NULL) { + free(pdev, M_DEVBUF); + pdev = NULL; + err = -ENOMEM; + xenbus_dev_fatal(xdev, err, "Error allocating sh_info struct"); + goto out; + } + pdev->sh_info->flags = 0; + + xdev->data = pdev; + + mtx_init(&pdev->sh_info_lock, "info_lock", "pci shared dev info lock", MTX_DEF); + + pdev->evtchn = INVALID_EVTCHN; + pdev->gnt_ref = INVALID_GRANT_REF; + + STAILQ_INSERT_TAIL(&pdev_list, pdev, next); + + DPRINTF("Allocated pdev @ 0x%p (unit=%d)\n", pdev, unit); + + out: + return pdev; +} + +/* Hold a reference to a pcifront device */ +static void +get_pdev(struct pcifront_device *pdev) +{ + pdev->ref_cnt++; +} + +/* Release a reference to a pcifront device */ +static void +put_pdev(struct pcifront_device *pdev) +{ + if (--pdev->ref_cnt > 0) + return; + + DPRINTF("freeing pdev @ 0x%p (ref_cnt=%d)\n", pdev, pdev->ref_cnt); + + if (pdev->evtchn != INVALID_EVTCHN) + xenbus_free_evtchn(pdev->xdev, pdev->evtchn); + + if (pdev->gnt_ref != INVALID_GRANT_REF) + gnttab_end_foreign_access(pdev->gnt_ref, 0, (void *)pdev->sh_info); + + pdev->xdev->data = NULL; + + free(pdev, M_DEVBUF); +} + + +/* Write to the xenbus info needed by backend */ +static int +pcifront_publish_info(struct pcifront_device *pdev) +{ + int err = 0; + struct xenbus_transaction *trans; + + err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); + if (err < 0) { + WPRINTF("error granting access to ring page\n"); + goto out; + } + + pdev->gnt_ref = err; + + err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); + if (err) + goto out; + + do_publish: + trans = xenbus_transaction_start(); + if (IS_ERR(trans)) { + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend " + "(start transaction)"); + goto out; + } + + err = xenbus_printf(trans, pdev->xdev->nodename, + "pci-op-ref", "%u", pdev->gnt_ref); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "event-channel", "%u", pdev->evtchn); + if (!err) + err = xenbus_printf(trans, pdev->xdev->nodename, + "magic", XEN_PCI_MAGIC); + if (!err) + err = xenbus_switch_state(pdev->xdev, trans, + XenbusStateInitialised); + + if (err) { + xenbus_transaction_end(trans, 1); + xenbus_dev_fatal(pdev->xdev, err, + "Error writing configuration for backend"); + goto out; + } else { + err = xenbus_transaction_end(trans, 0); + if (err == -EAGAIN) + goto do_publish; + else if (err) { + xenbus_dev_fatal(pdev->xdev, err, + "Error completing transaction for backend"); + goto out; + } + } + + out: + return err; +} + +/* The backend is now connected so complete the connection process on our side */ +static int +pcifront_connect(struct pcifront_device *pdev) +{ + device_t nexus; + devclass_t nexus_devclass; + + /* We will add our device as a child of the nexus0 device */ + if (!(nexus_devclass = devclass_find("nexus")) || + !(nexus = devclass_get_device(nexus_devclass, 0))) { + WPRINTF("could not find nexus0!\n"); + return -1; + } + + /* Create a newbus device representing this frontend instance */ + pdev->ndev = BUS_ADD_CHILD(nexus, 0, "xpcife", pdev->unit); + if (!pdev->ndev) { + WPRINTF("could not create xpcife%d!\n", pdev->unit); + return -EFAULT; + } + get_pdev(pdev); + device_set_ivars(pdev->ndev, pdev); + + /* Good to go connected now */ + xenbus_switch_state(pdev->xdev, NULL, XenbusStateConnected); + + printf("pcifront: connected to %s\n", pdev->xdev->nodename); + + mtx_lock(&Giant); + device_probe_and_attach(pdev->ndev); + mtx_unlock(&Giant); + + return 0; +} + +/* The backend is closing so process a disconnect */ +static int +pcifront_disconnect(struct pcifront_device *pdev) +{ + int err = 0; + XenbusState prev_state; + + prev_state = xenbus_read_driver_state(pdev->xdev->nodename); + + if (prev_state < XenbusStateClosing) { + err = xenbus_switch_state(pdev->xdev, NULL, XenbusStateClosing); + if (!err && prev_state == XenbusStateConnected) { + /* TODO - need to detach the newbus devices */ + } + } + + return err; +} + +/* Process a probe from the xenbus */ +static int +pcifront_probe(struct xenbus_device *xdev, + const struct xenbus_device_id *id) +{ + int err = 0; + struct pcifront_device *pdev; + + DPRINTF("xenbus probing\n"); + + if ((pdev = alloc_pdev(xdev)) == NULL) + goto out; + + err = pcifront_publish_info(pdev); + + out: + if (err) + put_pdev(pdev); + return err; +} + +/* Remove the xenbus PCI device */ +static int +pcifront_remove(struct xenbus_device *xdev) +{ + DPRINTF("removing xenbus device node (%s)\n", xdev->nodename); + if (xdev->data) + put_pdev(xdev->data); + return 0; +} + +/* Called by xenbus when our backend node changes state */ +static void +pcifront_backend_changed(struct xenbus_device *xdev, + XenbusState be_state) +{ + struct pcifront_device *pdev = xdev->data; + + switch (be_state) { + case XenbusStateClosing: + DPRINTF("backend closing (%s)\n", xdev->nodename); + pcifront_disconnect(pdev); + break; + + case XenbusStateClosed: + DPRINTF("backend closed (%s)\n", xdev->nodename); + pcifront_disconnect(pdev); + break; + + case XenbusStateConnected: + DPRINTF("backend connected (%s)\n", xdev->nodename); + pcifront_connect(pdev); + break; + + default: + break; + } +} + +/* Process PCI operation */ +static int +do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op) +{ + int err = 0; + struct xen_pci_op *active_op = &pdev->sh_info->op; + evtchn_port_t port = pdev->evtchn; + time_t timeout; + + mtx_lock(&pdev->sh_info_lock); + + memcpy(active_op, op, sizeof(struct xen_pci_op)); + + /* Go */ + wmb(); + set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags); + notify_remote_via_evtchn(port); + + timeout = time_uptime + 2; + + clear_evtchn(port); + + /* Spin while waiting for the answer */ + while (test_bit + (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) { + int err = HYPERVISOR_poll(&port, 1, 3 * hz); + if (err) + panic("Failed HYPERVISOR_poll: err=%d", err); + clear_evtchn(port); + if (time_uptime > timeout) { + WPRINTF("pciback not responding!!!\n"); + clear_bit(_XEN_PCIF_active, + (unsigned long *)&pdev->sh_info->flags); + err = XEN_PCI_ERR_dev_not_found; + goto out; + } + } + + memcpy(op, active_op, sizeof(struct xen_pci_op)); + + err = op->err; + out: + mtx_unlock(&pdev->sh_info_lock); + return err; +} + +/* ** XenBus Driver registration ** */ + +static struct xenbus_device_id pcifront_ids[] = { + { "pci" }, + { "" } +}; + +static struct xenbus_driver pcifront = { + .name = "pcifront", + .ids = pcifront_ids, + .probe = pcifront_probe, + .remove = pcifront_remove, + .otherend_changed = pcifront_backend_changed, +}; + +/* Register the driver with xenbus during sys init */ +static void +pcifront_init(void *unused) +{ + if ((xen_start_info->flags & SIF_INITDOMAIN)) + return; + + DPRINTF("xenbus registering\n"); + + xenbus_register_frontend(&pcifront); +} + +SYSINIT(pciif, SI_SUB_PSEUDO, SI_ORDER_ANY, pcifront_init, NULL) + + +/* Newbus xpcife device driver probe */ +static int +xpcife_probe(device_t dev) +{ +#ifdef XEN_PCIDEV_FE_DEBUG + struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev); + DPRINTF("xpcife probe (unit=%d)\n", pdev->unit); +#endif + return 0; +} + +/* Newbus xpcife device driver attach */ +static int +xpcife_attach(device_t dev) +{ + struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev); + int i, num_roots, len, err; + char str[64]; + unsigned int domain, bus; + + DPRINTF("xpcife attach (unit=%d)\n", pdev->unit); + + err = xenbus_scanf(NULL, pdev->xdev->otherend, + "root_num", "%d", &num_roots); + if (err != 1) { + if (err == 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading number of PCI roots"); + goto out; + } + + /* Add a pcib device for each root */ + for (i = 0; i < num_roots; i++) { + device_t child; + + len = snprintf(str, sizeof(str), "root-%d", i); + if (unlikely(len >= (sizeof(str) - 1))) { + err = -ENOMEM; + goto out; + } + + err = xenbus_scanf(NULL, pdev->xdev->otherend, str, + "%x:%x", &domain, &bus); + if (err != 2) { + if (err >= 0) + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Error reading PCI root %d", i); + goto out; + } + err = 0; + if (domain != pdev->xdev->otherend_id) { + err = -EINVAL; + xenbus_dev_fatal(pdev->xdev, err, + "Domain mismatch %d != %d", domain, pdev->xdev->otherend_id); + goto out; + } + + child = device_add_child(dev, "pcib", bus); + if (!child) { + err = -ENOMEM; + xenbus_dev_fatal(pdev->xdev, err, + "Unable to create pcib%d", bus); + goto out; + } + } + + out: + return bus_generic_attach(dev); +} + +static devclass_t xpcife_devclass; + +static device_method_t xpcife_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xpcife_probe), + DEVMETHOD(device_attach, xpcife_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + {0, 0} +}; + +static driver_t xpcife_driver = { + "xpcife", + xpcife_methods, + 0, +}; + +DRIVER_MODULE(xpcife, nexus, xpcife_driver, xpcife_devclass, 0, 0); + + +/* Newbus xen pcib device driver probe */ +static int +xpcib_probe(device_t dev) +{ + struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev); + struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(device_get_parent(dev)); + + DPRINTF("xpcib probe (bus=%d)\n", device_get_unit(dev)); + + sc->domain = pdev->xdev->otherend_id; + sc->bus = device_get_unit(dev); + sc->pdev = pdev; + + return 0; +} + +/* Newbus xen pcib device driver attach */ +static int +xpcib_attach(device_t dev) +{ + struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev); + + DPRINTF("xpcib attach (bus=%d)\n", sc->bus); + + device_add_child(dev, "pci", sc->bus); + return bus_generic_attach(dev); +} + +static int +xpcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) +{ + struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev); + switch (which) { + case PCIB_IVAR_BUS: + *result = sc->bus; + return 0; + } + return ENOENT; +} + +/* Return the number of slots supported */ +static int +xpcib_maxslots(device_t dev) +{ + return 31; +} + +#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +/* Read configuration space register */ +static u_int32_t +xpcib_read_config(device_t dev, int bus, int slot, int func, + int reg, int bytes) +{ + struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev); + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_read, + .domain = sc->domain, + .bus = sc->bus, + .devfn = PCI_DEVFN(slot, func), + .offset = reg, + .size = bytes, + }; + int err; + + err = do_pci_op(sc->pdev, &op); + + DPRINTF("read config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n", + bus, slot, func, reg, bytes, op.value, err); + + if (err) + op.value = ~0; + + return op.value; +} + +/* Write configuration space register */ +static void +xpcib_write_config(device_t dev, int bus, int slot, int func, + int reg, u_int32_t data, int bytes) +{ + struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev); + struct xen_pci_op op = { + .cmd = XEN_PCI_OP_conf_write, + .domain = sc->domain, + .bus = sc->bus, + .devfn = PCI_DEVFN(slot, func), + .offset = reg, + .size = bytes, + .value = data, + }; + int err; + + err = do_pci_op(sc->pdev, &op); + + DPRINTF("write config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n", + bus, slot, func, reg, bytes, data, err); +} + +static int +xpcib_route_interrupt(device_t pcib, device_t dev, int pin) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + + DPRINTF("route intr (pin=%d, line=%d)\n", pin, cfg->intline); + + return cfg->intline; +} + +static device_method_t xpcib_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xpcib_probe), + DEVMETHOD(device_attach, xpcib_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, bus_generic_print_child), + DEVMETHOD(bus_read_ivar, xpcib_read_ivar), + DEVMETHOD(bus_alloc_resource, bus_generic_alloc_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_release_resource, bus_generic_release_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + + /* pcib interface */ + DEVMETHOD(pcib_maxslots, xpcib_maxslots), + DEVMETHOD(pcib_read_config, xpcib_read_config), + DEVMETHOD(pcib_write_config, xpcib_write_config), + DEVMETHOD(pcib_route_interrupt, xpcib_route_interrupt), + { 0, 0 } +}; + +static devclass_t xpcib_devclass; + +DEFINE_CLASS_0(pcib, xpcib_driver, xpcib_methods, sizeof(struct xpcib_softc)); +DRIVER_MODULE(pcib, xpcife, xpcib_driver, xpcib_devclass, 0, 0); + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Property changes on: dev/xen/pcifront/pcifront.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/balloon/balloon.c =================================================================== --- dev/xen/balloon/balloon.c (.../stable/6/sys) (revision 0) +++ dev/xen/balloon/balloon.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,565 @@ +/****************************************************************************** + * balloon.c + * + * Xen balloon driver - enables returning/claiming memory to/from Xen. + * + * Copyright (c) 2003, B Dragovic + * Copyright (c) 2003-2004, M Williamson, K Fraser + * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver"); + +struct mtx balloon_mutex; + +/* + * Protects atomic reservation decrease/increase against concurrent increases. + * Also protects non-atomic updates of current_pages and driver_pages, and + * balloon lists. + */ +struct mtx balloon_lock; + +/* We increase/decrease in batches which fit in a page */ +static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)]; +#define ARRAY_SIZE(A) (sizeof(A) / sizeof(A[0])) + +struct balloon_stats { + /* We aim for 'current allocation' == 'target allocation'. */ + unsigned long current_pages; + unsigned long target_pages; + /* We may hit the hard limit in Xen. If we do then we remember it. */ + unsigned long hard_limit; + /* + * Drivers may alter the memory reservation independently, but they + * must inform the balloon driver so we avoid hitting the hard limit. + */ + unsigned long driver_pages; + /* Number of pages in high- and low-memory balloons. */ + unsigned long balloon_low; + unsigned long balloon_high; +}; + +static struct balloon_stats balloon_stats; +#define bs balloon_stats + +SYSCTL_DECL(_dev_xen); +SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD, + &bs.current_pages, 0, "Current allocation"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD, + &bs.target_pages, 0, "Target allocation"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD, + &bs.driver_pages, 0, "Driver pages"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD, + &bs.hard_limit, 0, "Xen hard limit"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD, + &bs.balloon_low, 0, "Low-mem balloon"); +SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD, + &bs.balloon_high, 0, "High-mem balloon"); + +struct balloon_entry { + vm_page_t page; + STAILQ_ENTRY(balloon_entry) list; +}; + +/* List of ballooned pages, threaded through the mem_map array. */ +static STAILQ_HEAD(,balloon_entry) ballooned_pages; + +/* Main work function, always executed in process context. */ +static void balloon_process(void *unused); + +#define IPRINTK(fmt, args...) \ + printk(KERN_INFO "xen_mem: " fmt, ##args) +#define WPRINTK(fmt, args...) \ + printk(KERN_WARNING "xen_mem: " fmt, ##args) + +/* balloon_append: add the given page to the balloon. */ +static void +balloon_append(vm_page_t page) +{ + struct balloon_entry *entry; + + entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK); + entry->page = page; + STAILQ_INSERT_HEAD(&ballooned_pages, entry, list); + bs.balloon_low++; +} + +/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */ +static vm_page_t +balloon_retrieve(void) +{ + vm_page_t page; + struct balloon_entry *entry; + + if (STAILQ_EMPTY(&ballooned_pages)) + return NULL; + + entry = STAILQ_FIRST(&ballooned_pages); + STAILQ_REMOVE_HEAD(&ballooned_pages, list); + + page = entry->page; + free(entry, M_DEVBUF); + + bs.balloon_low--; + + return page; +} + +static void +balloon_alarm(void *unused) +{ + wakeup(balloon_process); +} + +static unsigned long +current_target(void) +{ + unsigned long target = min(bs.target_pages, bs.hard_limit); + if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high)) + target = bs.current_pages + bs.balloon_low + bs.balloon_high; + return target; +} + +static unsigned long +minimum_target(void) +{ +#ifdef XENHVM +#define max_pfn physmem +#endif + unsigned long min_pages, curr_pages = current_target(); + +#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT)) + /* Simple continuous piecewiese linear function: + * max MiB -> min MiB gradient + * 0 0 + * 16 16 + * 32 24 + * 128 72 (1/2) + * 512 168 (1/4) + * 2048 360 (1/8) + * 8192 552 (1/32) + * 32768 1320 + * 131072 4392 + */ + if (max_pfn < MB2PAGES(128)) + min_pages = MB2PAGES(8) + (max_pfn >> 1); + else if (max_pfn < MB2PAGES(512)) + min_pages = MB2PAGES(40) + (max_pfn >> 2); + else if (max_pfn < MB2PAGES(2048)) + min_pages = MB2PAGES(104) + (max_pfn >> 3); + else + min_pages = MB2PAGES(296) + (max_pfn >> 5); +#undef MB2PAGES + + /* Don't enforce growth */ + return min(min_pages, curr_pages); +#ifndef CONFIG_XEN +#undef max_pfn +#endif +} + +static int +increase_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i; + struct balloon_entry *entry; + vm_page_t page; + long rc; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + mtx_lock(&balloon_lock); + + for (entry = STAILQ_FIRST(&ballooned_pages), i = 0; + i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) { + KASSERT(entry, ("ballooned_pages list corrupt")); + page = entry->page; + frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); + } + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op( + XENMEM_populate_physmap, &reservation); + if (rc < nr_pages) { + if (rc > 0) { + int ret; + + /* We hit the Xen hard limit: reprobe. */ + reservation.nr_extents = rc; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + KASSERT(ret == rc, ("HYPERVISOR_memory_op failed")); + } + if (rc >= 0) + bs.hard_limit = (bs.current_pages + rc - + bs.driver_pages); + goto out; + } + + for (i = 0; i < nr_pages; i++) { + page = balloon_retrieve(); + KASSERT(page, ("balloon_retrieve failed")); + + pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); + KASSERT((xen_feature(XENFEAT_auto_translated_physmap) || + !phys_to_machine_mapping_valid(pfn)), + ("auto translated physmap but mapping is valid")); + + set_phys_to_machine(pfn, frame_list[i]); + +#ifndef XENHVM + /* Link back into the page tables if not highmem. */ + if (pfn < max_low_pfn) { + int ret; + ret = HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + pfn_pte_ma(frame_list[i], PAGE_KERNEL), + 0); + PASSING(ret == 0, + ("HYPERVISOR_update_va_mapping failed")); + } +#endif + + /* Relinquish the page back to the allocator. */ + vm_page_unwire(page, 0); + vm_page_free(page); + } + + bs.current_pages += nr_pages; + //totalram_pages = bs.current_pages; + + out: + mtx_unlock(&balloon_lock); + + return 0; +} + +static int +decrease_reservation(unsigned long nr_pages) +{ + unsigned long pfn, i; + vm_page_t page; + int need_sleep = 0; + int ret; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + if (nr_pages > ARRAY_SIZE(frame_list)) + nr_pages = ARRAY_SIZE(frame_list); + + for (i = 0; i < nr_pages; i++) { + int color = 0; + if ((page = vm_page_alloc(NULL, color++, + VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { + nr_pages = i; + need_sleep = 1; + break; + } + + pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT); + frame_list[i] = PFNTOMFN(pfn); + +#if 0 + if (!PageHighMem(page)) { + v = phys_to_virt(pfn << PAGE_SHIFT); + scrub_pages(v, 1); +#ifdef CONFIG_XEN + ret = HYPERVISOR_update_va_mapping( + (unsigned long)v, __pte_ma(0), 0); + BUG_ON(ret); +#endif + } +#endif +#ifdef CONFIG_XEN_SCRUB_PAGES + else { + v = kmap(page); + scrub_pages(v, 1); + kunmap(page); + } +#endif + } + +#ifdef CONFIG_XEN + /* Ensure that ballooned highmem pages don't have kmaps. */ + kmap_flush_unused(); + flush_tlb_all(); +#endif + + mtx_lock(&balloon_lock); + + /* No more mappings: invalidate P2M and add to balloon. */ + for (i = 0; i < nr_pages; i++) { + pfn = MFNTOPFN(frame_list[i]); + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT)); + } + + set_xen_guest_handle(reservation.extent_start, frame_list); + reservation.nr_extents = nr_pages; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed")); + + bs.current_pages -= nr_pages; + //totalram_pages = bs.current_pages; + + mtx_unlock(&balloon_lock); + + return (need_sleep); +} + +/* + * We avoid multiple worker processes conflicting via the balloon mutex. + * We may of course race updates of the target counts (which are protected + * by the balloon lock), or with changes to the Xen hard limit, but we will + * recover from these in time. + */ +static void +balloon_process(void *unused) +{ + int need_sleep = 0; + long credit; + + mtx_lock(&balloon_mutex); + for (;;) { + do { + credit = current_target() - bs.current_pages; + if (credit > 0) + need_sleep = (increase_reservation(credit) != 0); + if (credit < 0) + need_sleep = (decrease_reservation(-credit) != 0); + + } while ((credit != 0) && !need_sleep); + + /* Schedule more work if there is some still to be done. */ + if (current_target() != bs.current_pages) + timeout(balloon_alarm, NULL, ticks + hz); + + msleep(balloon_process, &balloon_mutex, 0, "balloon", -1); + } + mtx_unlock(&balloon_mutex); +} + +/* Resets the Xen limit, sets new target, and kicks off processing. */ +static void +set_new_target(unsigned long target) +{ + /* No need for lock. Not read-modify-write updates. */ + bs.hard_limit = ~0UL; + bs.target_pages = max(target, minimum_target()); + wakeup(balloon_process); +} + +static struct xenbus_watch target_watch = +{ + .node = "memory/target" +}; + +/* React to a change in the target key */ +static void +watch_target(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + unsigned long long new_target; + int err; + + err = xenbus_scanf(XBT_NIL, "memory", "target", NULL, + "%llu", &new_target); + if (err) { + /* This is ok (for domain0 at least) - so just return */ + return; + } + + /* The given memory/target value is in KiB, so it needs converting to + pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10. + */ + set_new_target(new_target >> (PAGE_SHIFT - 10)); + +} + +static void +balloon_init_watcher(void *arg) +{ + int err; + + err = register_xenbus_watch(&target_watch); + if (err) + printf("Failed to set balloon watcher\n"); + +} +SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY, + balloon_init_watcher, NULL); + +static void +balloon_init(void *arg) +{ +#ifndef XENHVM + vm_page_t page; +#endif + + if (!is_running_on_xen()) + return; + + mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF); + mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF); + +#ifndef XENHVM + bs.current_pages = min(xen_start_info->nr_pages, max_pfn); +#else + bs.current_pages = physmem; +#endif + bs.target_pages = bs.current_pages; + bs.balloon_low = 0; + bs.balloon_high = 0; + bs.driver_pages = 0UL; + bs.hard_limit = ~0UL; + + kthread_create(balloon_process, NULL, NULL, 0, 0, "balloon"); +// init_timer(&balloon_timer); +// balloon_timer.data = 0; +// balloon_timer.function = balloon_alarm; + +#ifndef XENHVM + /* Initialise the balloon with excess memory space. */ + for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) { + page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT); + balloon_append(page); + } +#endif + + target_watch.callback = watch_target; + + return; +} +SYSINIT(balloon_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, balloon_init, NULL); + +void balloon_update_driver_allowance(long delta); + +void +balloon_update_driver_allowance(long delta) +{ + mtx_lock(&balloon_lock); + bs.driver_pages += delta; + mtx_unlock(&balloon_lock); +} + +#if 0 +static int dealloc_pte_fn( + pte_t *pte, struct page *pte_page, unsigned long addr, void *data) +{ + unsigned long mfn = pte_mfn(*pte); + int ret; + struct xen_memory_reservation reservation = { + .extent_start = &mfn, + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_pte_at(&init_mm, addr, pte, __pte_ma(0)); + set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY); + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + KASSERT(ret == 1, ("HYPERVISOR_memory_op failed")); + return 0; +} + +#endif + +#if 0 +vm_page_t +balloon_alloc_empty_page_range(unsigned long nr_pages) +{ + vm_page_t pages; + int i, rc; + unsigned long *mfn_list; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4) + if (pages == NULL) + return NULL; + + mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK); + + for (i = 0; i < nr_pages; i++) { + mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT); + PFNTOMFN(i) = INVALID_P2M_ENTRY; + reservation.extent_start = mfn_list; + reservation.nr_extents = nr_pages; + rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation, + &reservation); + KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed")); + } + + current_pages -= nr_pages; + + wakeup(balloon_process); + + return pages; +} + +void +balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages) +{ + unsigned long i; + + for (i = 0; i < nr_pages; i++) + balloon_append(page + i); + + wakeup(balloon_process); +} +#endif Property changes on: dev/xen/balloon/balloon.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/xenpci/machine_reboot.c =================================================================== --- dev/xen/xenpci/machine_reboot.c (.../stable/6/sys) (revision 0) +++ dev/xen/xenpci/machine_reboot.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 2008 Citrix Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include + +#include + +void +xen_suspend() +{ + int suspend_cancelled; + + if (DEVICE_SUSPEND(root_bus)) { + printf("xen_suspend: device_suspend failed\n"); + return; + } + + /* + * Make sure we don't change cpus or switch to some other + * thread. for the duration. + */ + critical_enter(); + + /* + * Prevent any races with evtchn_interrupt() handler. + */ + irq_suspend(); + disable_intr(); + + suspend_cancelled = HYPERVISOR_suspend(0); + if (!suspend_cancelled) + xenpci_resume(); + + /* + * Re-enable interrupts and put the scheduler back to normal. + */ + enable_intr(); + critical_exit(); + + /* + * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or + * similar. + */ + if (!suspend_cancelled) + DEVICE_RESUME(root_bus); +} Property changes on: dev/xen/xenpci/machine_reboot.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: dev/xen/xenpci/xenpcivar.h =================================================================== --- dev/xen/xenpci/xenpcivar.h (.../stable/6/sys) (revision 0) +++ dev/xen/xenpci/xenpcivar.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2008 Citrix Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * One of these per allocated device. + */ +struct xenpci_softc { + int rid_ioport; + int rid_memory; + int rid_irq; + struct resource* res_memory; /* Resource for mem range. */ + struct resource* res_irq; /* Resource for irq range. */ + void *intr_cookie; + + vm_paddr_t phys_next; /* next page from mem range */ +}; + +extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp); +extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa); +extern void xenpci_resume(void); +extern void xen_suspend(void); Property changes on: dev/xen/xenpci/xenpcivar.h ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: dev/xen/xenpci/xenpci.c =================================================================== --- dev/xen/xenpci/xenpci.c (.../stable/6/sys) (revision 0) +++ dev/xen/xenpci/xenpci.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2008 Citrix Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +/* + * These variables are used by the rest of the kernel to access the + * hypervisor. + */ +char *hypercall_stubs; +shared_info_t *HYPERVISOR_shared_info; +static vm_paddr_t shared_info_pa; + +/* + * This is used to find our platform device instance. + */ +static devclass_t xenpci_devclass; + +/* + * Return the CPUID base address for Xen functions. + */ +static uint32_t +xenpci_cpuid_base(void) +{ + uint32_t base, regs[4]; + + for (base = 0x40000000; base < 0x40001000; base += 0x100) { + do_cpuid(base, regs); + if (!memcmp("XenVMMXenVMM", ®s[1], 12) + && (regs[0] - base) >= 2) + return (base); + } + return (0); +} + +/* + * Allocate and fill in the hypcall page. + */ +static int +xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp) +{ + uint32_t base, regs[4]; + int i; + + base = xenpci_cpuid_base(); + if (!base) { + device_printf(dev, "Xen platform device but not Xen VMM\n"); + return (EINVAL); + } + + if (bootverbose) { + do_cpuid(base + 1, regs); + device_printf(dev, "Xen version %d.%d.\n", + regs[0] >> 16, regs[0] & 0xffff); + } + + /* + * Find the hypercall pages. + */ + do_cpuid(base + 2, regs); + + hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK); + + for (i = 0; i < regs[0]; i++) { + wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); + } + + return (0); +} + +/* + * After a resume, re-initialise the hypercall page. + */ +static void +xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp) +{ + uint32_t base, regs[4]; + int i; + + base = xenpci_cpuid_base(); + + do_cpuid(base + 2, regs); + for (i = 0; i < regs[0]; i++) { + wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); + } +} + +/* + * Tell the hypervisor how to contact us for event channel callbacks. + */ +static void +xenpci_set_callback(device_t dev) +{ + int irq; + uint64_t callback; + struct xen_hvm_param xhp; + + irq = pci_get_irq(dev); + if (irq < 16) { + callback = irq; + } else { + callback = (pci_get_intpin(dev) - 1) & 3; + callback |= pci_get_slot(dev) << 11; + callback |= 1ull << 56; + } + + xhp.domid = DOMID_SELF; + xhp.index = HVM_PARAM_CALLBACK_IRQ; + xhp.value = callback; + if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)) + panic("Can't set evtchn callback"); +} + + +/* + * Deallocate anything allocated by xenpci_allocate_resources. + */ +static int +xenpci_deallocate_resources(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + + if (scp->res_irq != 0) { + bus_deactivate_resource(dev, SYS_RES_IRQ, + scp->rid_irq, scp->res_irq); + bus_release_resource(dev, SYS_RES_IRQ, + scp->rid_irq, scp->res_irq); + scp->res_irq = 0; + } + if (scp->res_memory != 0) { + bus_deactivate_resource(dev, SYS_RES_MEMORY, + scp->rid_memory, scp->res_memory); + bus_release_resource(dev, SYS_RES_MEMORY, + scp->rid_memory, scp->res_memory); + scp->res_memory = 0; + } + + return (0); +} + +/* + * Allocate irq and memory resources. + */ +static int +xenpci_allocate_resources(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + + scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, + &scp->rid_irq, RF_SHAREABLE|RF_ACTIVE); + if (scp->res_irq == NULL) + goto errexit; + + scp->rid_memory = PCIR_BAR(1); + scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &scp->rid_memory, RF_ACTIVE); + if (scp->res_memory == NULL) + goto errexit; + return (0); + +errexit: + /* Cleanup anything we may have assigned. */ + xenpci_deallocate_resources(dev); + return (ENXIO); /* For want of a better idea. */ +} + +/* + * Allocate a physical address range from our mmio region. + */ +static int +xenpci_alloc_space_int(struct xenpci_softc *scp, size_t sz, + vm_paddr_t *pa) +{ + + if (scp->phys_next + sz > rman_get_end(scp->res_memory)) { + return (ENOMEM); + } + + *pa = scp->phys_next; + scp->phys_next += sz; + + return (0); +} + +/* + * Allocate a physical address range from our mmio region. + */ +int +xenpci_alloc_space(size_t sz, vm_paddr_t *pa) +{ + device_t dev = devclass_get_device(xenpci_devclass, 0); + + if (dev) { + return (xenpci_alloc_space_int(device_get_softc(dev), + sz, pa)); + } else { + return (ENOMEM); + } +} + +/* + * Called very early in the resume sequence - reinitialise the various + * bits of Xen machinery including the hypercall page and the shared + * info page. + */ +void +xenpci_resume() +{ + device_t dev = devclass_get_device(xenpci_devclass, 0); + struct xenpci_softc *scp = device_get_softc(dev); + struct xen_add_to_physmap xatp; + + xenpci_resume_hypercall_stubs(dev, scp); + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = shared_info_pa >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + panic("HYPERVISOR_memory_op failed"); + + pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa); + + xenpci_set_callback(dev); + + gnttab_resume(); + irq_resume(); +} + +/* + * Probe - just check device ID. + */ +static int +xenpci_probe(device_t dev) +{ + + if (pci_get_devid(dev) != 0x00015853) + return (ENXIO); + + device_set_desc(dev, "Xen Platform Device"); + return (bus_generic_probe(dev)); +} + +/* + * Attach - find resources and talk to Xen. + */ +static int +xenpci_attach(device_t dev) +{ + int error; + struct xenpci_softc *scp = device_get_softc(dev); + struct xen_add_to_physmap xatp; + vm_offset_t shared_va; + + error = xenpci_allocate_resources(dev); + if (error) + goto errexit; + + scp->phys_next = rman_get_start(scp->res_memory); + + error = xenpci_init_hypercall_stubs(dev, scp); + if (error) + goto errexit; + + setup_xen_features(); + + xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = shared_info_pa >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + panic("HYPERVISOR_memory_op failed"); + + shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + pmap_kenter(shared_va, shared_info_pa); + HYPERVISOR_shared_info = (void *) shared_va; + + /* + * Hook the irq up to evtchn + */ + xenpci_irq_init(dev, scp); + xenpci_set_callback(dev); + + return (bus_generic_attach(dev)); + +errexit: + /* + * Undo anything we may have done. + */ + xenpci_deallocate_resources(dev); + return (error); +} + +/* + * Detach - reverse anything done by attach. + */ +static int +xenpci_detach(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + /* + * Take our interrupt handler out of the list of handlers + * that can handle this irq. + */ + if (scp->intr_cookie != NULL) { + if (BUS_TEARDOWN_INTR(parent, dev, + scp->res_irq, scp->intr_cookie) != 0) + printf("intr teardown failed.. continuing\n"); + scp->intr_cookie = NULL; + } + + /* + * Deallocate any system resources we may have + * allocated on behalf of this driver. + */ + return (xenpci_deallocate_resources(dev)); +} + +static device_method_t xenpci_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, xenpci_probe), + DEVMETHOD(device_attach, xenpci_attach), + DEVMETHOD(device_detach, xenpci_detach), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus interface */ + DEVMETHOD(bus_add_child, bus_generic_add_child), + + { 0, 0 } +}; + +static driver_t xenpci_driver = { + "xenpci", + xenpci_methods, + sizeof(struct xenpci_softc), +}; + +DRIVER_MODULE(xenpci, pci, xenpci_driver, xenpci_devclass, 0, 0); Property changes on: dev/xen/xenpci/xenpci.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: dev/xen/xenpci/evtchn.c =================================================================== --- dev/xen/xenpci/evtchn.c (.../stable/6/sys) (revision 0) +++ dev/xen/xenpci/evtchn.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,418 @@ +/****************************************************************************** + * evtchn.c + * + * A simplified event channel for para-drivers in unmodified linux + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005, Intel Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfq %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +#define is_valid_evtchn(x) ((x) != 0) +#define evtchn_from_irq(x) (irq_evtchn[irq].evtchn) + +static struct { + struct mtx lock; + driver_intr_t *handler; + void *arg; + int evtchn; + int close:1; /* close on unbind_from_irqhandler()? */ + int inuse:1; + int in_handler:1; + int mpsafe:1; +} irq_evtchn[256]; +static int evtchn_to_irq[NR_EVENT_CHANNELS] = { + [0 ... NR_EVENT_CHANNELS-1] = -1 }; + +static struct mtx irq_alloc_lock; +static device_t xenpci_device; + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +static unsigned int +alloc_xen_irq(void) +{ + static int warned; + unsigned int irq; + + mtx_lock(&irq_alloc_lock); + + for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) { + if (irq_evtchn[irq].inuse) + continue; + irq_evtchn[irq].inuse = 1; + mtx_unlock(&irq_alloc_lock); + return irq; + } + + if (!warned) { + warned = 1; + printf("alloc_xen_irq: No available IRQ to bind to: " + "increase irq_evtchn[] size in evtchn.c.\n"); + } + + mtx_unlock(&irq_alloc_lock); + + return -ENOSPC; +} + +static void +free_xen_irq(int irq) +{ + + mtx_lock(&irq_alloc_lock); + irq_evtchn[irq].inuse = 0; + mtx_unlock(&irq_alloc_lock); +} + +int +irq_to_evtchn_port(int irq) +{ + + return irq_evtchn[irq].evtchn; +} + +void +mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + synch_set_bit(port, &s->evtchn_mask[0]); +} + +void +unmask_evtchn(int port) +{ + evtchn_unmask_t op = { .port = port }; + + HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); +} + +int +bind_listening_port_to_irqhandler(unsigned int remote_domain, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp) +{ + struct evtchn_alloc_unbound alloc_unbound; + unsigned int irq; + int error; + + irq = alloc_xen_irq(); + if (irq < 0) + return irq; + + mtx_lock(&irq_evtchn[irq].lock); + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (error) { + mtx_unlock(&irq_evtchn[irq].lock); + free_xen_irq(irq); + return (-error); + } + + irq_evtchn[irq].handler = handler; + irq_evtchn[irq].arg = arg; + irq_evtchn[irq].evtchn = alloc_unbound.port; + irq_evtchn[irq].close = 1; + irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; + + evtchn_to_irq[alloc_unbound.port] = irq; + + unmask_evtchn(alloc_unbound.port); + + mtx_unlock(&irq_evtchn[irq].lock); + + if (irqp) + *irqp = irq; + return (0); +} + +int +bind_caller_port_to_irqhandler(unsigned int caller_port, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + + irq = alloc_xen_irq(); + if (irq < 0) + return irq; + + mtx_lock(&irq_evtchn[irq].lock); + + irq_evtchn[irq].handler = handler; + irq_evtchn[irq].arg = arg; + irq_evtchn[irq].evtchn = caller_port; + irq_evtchn[irq].close = 0; + irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; + + evtchn_to_irq[caller_port] = irq; + + unmask_evtchn(caller_port); + + mtx_unlock(&irq_evtchn[irq].lock); + + if (irqp) + *irqp = irq; + return (0); +} + +void +unbind_from_irqhandler(unsigned int irq) +{ + int evtchn; + + mtx_lock(&irq_evtchn[irq].lock); + + evtchn = evtchn_from_irq(irq); + + if (is_valid_evtchn(evtchn)) { + evtchn_to_irq[evtchn] = -1; + mask_evtchn(evtchn); + if (irq_evtchn[irq].close) { + struct evtchn_close close = { .port = evtchn }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + } + } + + irq_evtchn[irq].handler = NULL; + irq_evtchn[irq].evtchn = 0; + + mtx_unlock(&irq_evtchn[irq].lock); + + while (irq_evtchn[irq].in_handler) + cpu_relax(); + + free_xen_irq(irq); +} + +void notify_remote_via_irq(int irq) +{ + int evtchn; + + evtchn = evtchn_from_irq(irq); + if (is_valid_evtchn(evtchn)) + notify_remote_via_evtchn(evtchn); +} + +static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, + unsigned int idx) +{ + return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); +} + +static void +evtchn_interrupt(void *arg) +{ + unsigned int l1i, l2i, port; + unsigned long masked_l1, masked_l2; + /* XXX: All events are bound to vcpu0 but irq may be redirected. */ + int cpu = 0; /*smp_processor_id();*/ + driver_intr_t *handler; + void *handler_arg; + int irq, handler_mpsafe; + shared_info_t *s = HYPERVISOR_shared_info; + vcpu_info_t *v = &s->vcpu_info[cpu]; + struct pcpu *pc = pcpu_find(cpu); + unsigned long l1, l2; + + v->evtchn_upcall_pending = 0; + +#if 0 +#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ + /* Clear master flag /before/ clearing selector flag. */ + wmb(); +#endif +#endif + + l1 = atomic_readandclear_long(&v->evtchn_pending_sel); + + l1i = pc->pc_last_processed_l1i; + l2i = pc->pc_last_processed_l2i; + + while (l1 != 0) { + + l1i = (l1i + 1) % LONG_BIT; + masked_l1 = l1 & ((~0UL) << l1i); + + if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */ + l1i = LONG_BIT - 1; + l2i = LONG_BIT - 1; + continue; + } + l1i = __ffs(masked_l1); + + do { + l2 = active_evtchns(cpu, s, l1i); + + l2i = (l2i + 1) % LONG_BIT; + masked_l2 = l2 & ((~0UL) << l2i); + + if (masked_l2 == 0) { /* if we masked out all events, move on */ + l2i = LONG_BIT - 1; + break; + } + l2i = __ffs(masked_l2); + + /* process port */ + port = (l1i * LONG_BIT) + l2i; + synch_clear_bit(port, &s->evtchn_pending[0]); + + irq = evtchn_to_irq[port]; + if (irq < 0) + continue; + + mtx_lock(&irq_evtchn[irq].lock); + handler = irq_evtchn[irq].handler; + handler_arg = irq_evtchn[irq].arg; + handler_mpsafe = irq_evtchn[irq].mpsafe; + if (unlikely(handler == NULL)) { + printf("Xen IRQ%d (port %d) has no handler!\n", + irq, port); + mtx_unlock(&irq_evtchn[irq].lock); + continue; + } + irq_evtchn[irq].in_handler = 1; + mtx_unlock(&irq_evtchn[irq].lock); + + //local_irq_enable(); + if (!handler_mpsafe) + mtx_lock(&Giant); + handler(handler_arg); + if (!handler_mpsafe) + mtx_unlock(&Giant); + //local_irq_disable(); + + mtx_lock(&irq_evtchn[irq].lock); + irq_evtchn[irq].in_handler = 0; + mtx_unlock(&irq_evtchn[irq].lock); + + /* if this is the final port processed, we'll pick up here+1 next time */ + pc->pc_last_processed_l1i = l1i; + pc->pc_last_processed_l2i = l2i; + + } while (l2i != LONG_BIT - 1); + + l2 = active_evtchns(cpu, s, l1i); + if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ + l1 &= ~(1UL << l1i); + } +} + +void +irq_suspend(void) +{ + struct xenpci_softc *scp = device_get_softc(xenpci_device); + + /* + * Take our interrupt handler out of the list of handlers + * that can handle this irq. + */ + if (scp->intr_cookie != NULL) { + if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device), + xenpci_device, scp->res_irq, scp->intr_cookie) != 0) + printf("intr teardown failed.. continuing\n"); + scp->intr_cookie = NULL; + } +} + +void +irq_resume(void) +{ + struct xenpci_softc *scp = device_get_softc(xenpci_device); + int evtchn, irq; + + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) { + mask_evtchn(evtchn); + evtchn_to_irq[evtchn] = -1; + } + + for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) + irq_evtchn[irq].evtchn = 0; + + BUS_SETUP_INTR(device_get_parent(xenpci_device), + xenpci_device, scp->res_irq, INTR_TYPE_MISC, + evtchn_interrupt, NULL, &scp->intr_cookie); +} + +int +xenpci_irq_init(device_t device, struct xenpci_softc *scp) +{ + int irq, cpu; + int error; + + mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF); + + for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) + mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF); + + for (cpu = 0; cpu < mp_ncpus; cpu++) { + pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1; + pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1; + } + + error = BUS_SETUP_INTR(device_get_parent(device), device, + scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, evtchn_interrupt, NULL, + &scp->intr_cookie); + if (error) + return (error); + + xenpci_device = device; + + return (0); +} Property changes on: dev/xen/xenpci/evtchn.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: dev/xen/evtchn/evtchn_dev.c =================================================================== --- dev/xen/evtchn/evtchn_dev.c (.../stable/6/sys) (revision 0) +++ dev/xen/evtchn/evtchn_dev.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,394 @@ +/****************************************************************************** + * evtchn.c + * + * Xenolinux driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + + +typedef struct evtchn_sotfc { + + struct selinfo ev_rsel; +} evtchn_softc_t; + + +#ifdef linuxcrap +/* NB. This must be shared amongst drivers if more things go in /dev/xen */ +static devfs_handle_t xen_dev_dir; +#endif + +/* Only one process may open /dev/xen/evtchn at any time. */ +static unsigned long evtchn_dev_inuse; + +/* Notification ring, accessed via /dev/xen/evtchn. */ + +#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ + +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) +static uint16_t *ring; +static unsigned int ring_cons, ring_prod, ring_overflow; + +/* Which ports is user-space bound to? */ +static uint32_t bound_ports[32]; + +/* Unique address for processes to sleep on */ +static void *evtchn_waddr = ˚ + +static struct mtx lock, upcall_lock; + +static d_read_t evtchn_read; +static d_write_t evtchn_write; +static d_ioctl_t evtchn_ioctl; +static d_poll_t evtchn_poll; +static d_open_t evtchn_open; +static d_close_t evtchn_close; + + +void +evtchn_device_upcall(int port) +{ + mtx_lock(&upcall_lock); + + mask_evtchn(port); + clear_evtchn(port); + + if ( ring != NULL ) { + if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { + ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; + if ( ring_cons == ring_prod++ ) { + wakeup(evtchn_waddr); + } + } + else { + ring_overflow = 1; + } + } + + mtx_unlock(&upcall_lock); +} + +static void +__evtchn_reset_buffer_ring(void) +{ + /* Initialise the ring to empty. Clear errors. */ + ring_cons = ring_prod = ring_overflow = 0; +} + +static int +evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc; + unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; + count = uio->uio_resid; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + for ( ; ; ) { + if ( (c = ring_cons) != (p = ring_prod) ) + break; + + if ( ring_overflow ) { + rc = EFBIG; + goto out; + } + + if (sst != 0) { + rc = EINTR; + goto out; + } + + /* PCATCH == check for signals before and after sleeping + * PWAIT == priority of waiting on resource + */ + sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); + } + else { + bytes1 = (p - c) * sizeof(uint16_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if ( bytes1 > count ) { + bytes1 = count; + bytes2 = 0; + } + else if ( (bytes1 + bytes2) > count ) { + bytes2 = count - bytes1; + } + + if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || + ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) + /* keeping this around as its replacement is not equivalent + * copyout(&ring[0], &buf[bytes1], bytes2) + */ + { + rc = EFAULT; + goto out; + } + + ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); + + rc = bytes1 + bytes2; + + out: + + return rc; +} + +static int +evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc, i, count; + + count = uio->uio_resid; + + uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + + + if ( kbuf == NULL ) + return ENOMEM; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + if ( uiomove(kbuf, count, uio) != 0 ) { + rc = EFAULT; + goto out; + } + + mtx_lock_spin(&lock); + for ( i = 0; i < (count/2); i++ ) + if ( test_bit(kbuf[i], &bound_ports[0]) ) + unmask_evtchn(kbuf[i]); + mtx_unlock_spin(&lock); + + rc = count; + + out: + free(kbuf, M_DEVBUF); + return rc; +} + +static int +evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, + int mode, struct thread *td __unused) +{ + int rc = 0; + + mtx_lock_spin(&lock); + + switch ( cmd ) + { + case EVTCHN_RESET: + __evtchn_reset_buffer_ring(); + break; + case EVTCHN_BIND: + if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) ) + unmask_evtchn((int)arg); + else + rc = EINVAL; + break; + case EVTCHN_UNBIND: + if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) ) + mask_evtchn((int)arg); + else + rc = EINVAL; + break; + default: + rc = ENOSYS; + break; + } + + mtx_unlock_spin(&lock); + + return rc; +} + +static int +evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) +{ + + evtchn_softc_t *sc; + unsigned int mask = POLLOUT | POLLWRNORM; + + sc = dev->si_drv1; + + if ( ring_cons != ring_prod ) + mask |= POLLIN | POLLRDNORM; + else if ( ring_overflow ) + mask = POLLERR; + else + selrecord(td, &sc->ev_rsel); + + + return mask; +} + + +static int +evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + uint16_t *_ring; + + if (flag & O_NONBLOCK) + return EBUSY; + + if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) + return EBUSY; + + if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) + return ENOMEM; + + mtx_lock_spin(&lock); + ring = _ring; + __evtchn_reset_buffer_ring(); + mtx_unlock_spin(&lock); + + + return 0; +} + +static int +evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) +{ + int i; + + mtx_lock_spin(&lock); + if (ring != NULL) { + free(ring, M_DEVBUF); + ring = NULL; + } + for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) + if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) + mask_evtchn(i); + mtx_unlock_spin(&lock); + + evtchn_dev_inuse = 0; + + return 0; +} + +static struct cdevsw evtchn_devsw = { + d_version: D_VERSION, + d_open: evtchn_open, + d_close: evtchn_close, + d_read: evtchn_read, + d_write: evtchn_write, + d_ioctl: evtchn_ioctl, + d_poll: evtchn_poll, + d_name: "evtchn", + d_flags: 0, +}; + + +/* XXX - if this device is ever supposed to support use by more than one process + * this global static will have to go away + */ +static struct cdev *evtchn_dev; + + + +static int +evtchn_init(void *dummy __unused) +{ + /* XXX I believe we don't need these leaving them here for now until we + * have some semblance of it working + */ + mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); + + /* (DEVFS) create '/dev/misc/evtchn'. */ + evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); + + mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); + + evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); + bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); + + /* XXX I don't think we need any of this rubbish */ +#if 0 + if ( err != 0 ) + { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + /* (DEVFS) create directory '/dev/xen'. */ + xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); + + /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ + pos = devfs_generate_path(evtchn_miscdev.devfs_handle, + &link_dest[3], + sizeof(link_dest) - 3); + if ( pos >= 0 ) + strncpy(&link_dest[pos], "../", 3); + /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ + (void)devfs_mk_symlink(xen_dev_dir, + "evtchn", + DEVFS_FL_DEFAULT, + &link_dest[pos], + &symlink_handle, + NULL); + + /* (DEVFS) automatically destroy the symlink with its destination. */ + devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); +#endif + printk("Event-channel device installed.\n"); + + return 0; +} + + +SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL); + + Property changes on: dev/xen/evtchn/evtchn_dev.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/netback/netback.c =================================================================== --- dev/xen/netback/netback.c (.../stable/6/sys) (revision 0) +++ dev/xen/netback/netback.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1585 @@ +/* + * Copyright (c) 2006, Cisco Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifdef XEN_NETBACK_DEBUG +#define DPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTF(fmt, args...) ((void)0) +#endif + +#ifdef XEN_NETBACK_DEBUG_LOTS +#define DDPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#define DPRINTF_MBUF(_m) print_mbuf(_m, 0) +#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len) +#else +#define DDPRINTF(fmt, args...) ((void)0) +#define DPRINTF_MBUF(_m) ((void)0) +#define DPRINTF_MBUF_LEN(_m, _len) ((void)0) +#endif + +#define WPRINTF(fmt, args...) \ + printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define BUG_ON PANIC_IF + +#define IFNAME(_np) (_np)->ifp->if_xname + +#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE) +#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE) + +struct ring_ref { + vm_offset_t va; + grant_handle_t handle; + uint64_t bus_addr; +}; + +typedef struct netback_info { + + /* Schedule lists */ + STAILQ_ENTRY(netback_info) next_tx; + STAILQ_ENTRY(netback_info) next_rx; + int on_tx_sched_list; + int on_rx_sched_list; + + struct xenbus_device *xdev; + XenbusState frontend_state; + + domid_t domid; + int handle; + char *bridge; + + int rings_connected; + struct ring_ref tx_ring_ref; + struct ring_ref rx_ring_ref; + netif_tx_back_ring_t tx; + netif_rx_back_ring_t rx; + evtchn_port_t evtchn; + int irq; + void *irq_cookie; + + struct ifnet *ifp; + int ref_cnt; + + device_t ndev; + int attached; +} netif_t; + + +#define MAX_PENDING_REQS 256 +#define PKT_PROT_LEN 64 + +static struct { + netif_tx_request_t req; + netif_t *netif; +} pending_tx_info[MAX_PENDING_REQS]; +static uint16_t pending_ring[MAX_PENDING_REQS]; +typedef unsigned int PEND_RING_IDX; +#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1)) +static PEND_RING_IDX pending_prod, pending_cons; +#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons) + +static unsigned long mmap_vstart; +#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE)) + +/* Freed TX mbufs get batched on this ring before return to pending_ring. */ +static uint16_t dealloc_ring[MAX_PENDING_REQS]; +static PEND_RING_IDX dealloc_prod, dealloc_cons; + +static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1]; +static mmu_update_t rx_mmu[NET_RX_RING_SIZE]; +static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE]; + +static grant_handle_t grant_tx_handle[MAX_PENDING_REQS]; +static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS]; +static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS]; + +static struct task net_tx_task, net_rx_task; +static struct callout rx_task_callout; + +static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list = + STAILQ_HEAD_INITIALIZER(tx_sched_list); +static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list = + STAILQ_HEAD_INITIALIZER(rx_sched_list); +static struct mtx tx_sched_list_lock; +static struct mtx rx_sched_list_lock; + +static int vif_unit_maker = 0; + +/* Protos */ +static void netback_start(struct ifnet *ifp); +static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); +static int vif_add_dev(struct xenbus_device *xdev); +static void disconnect_rings(netif_t *netif); + +#ifdef XEN_NETBACK_DEBUG_LOTS +/* Debug code to display the contents of an mbuf */ +static void +print_mbuf(struct mbuf *m, int max) +{ + int i, j=0; + printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len); + for (; m; m = m->m_next) { + unsigned char *d = m->m_data; + for (i=0; i < m->m_len; i++) { + if (max && j == max) + break; + if ((j++ % 16) == 0) + printf("\n%04x:", j); + printf(" %02x", d[i]); + } + } + printf("\n"); +} +#endif + + +#define MAX_MFN_ALLOC 64 +static unsigned long mfn_list[MAX_MFN_ALLOC]; +static unsigned int alloc_index = 0; + +static unsigned long +alloc_mfn(void) +{ + unsigned long mfn = 0; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = MAX_MFN_ALLOC, + .extent_order = 0, + .domid = DOMID_SELF + }; + if ( unlikely(alloc_index == 0) ) + alloc_index = HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation); + if ( alloc_index != 0 ) + mfn = mfn_list[--alloc_index]; + return mfn; +} + +static unsigned long +alloc_empty_page_range(unsigned long nr_pages) +{ + void *pages; + int i = 0, j = 0; + multicall_entry_t mcl[17]; + unsigned long mfn_list[16]; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = 0, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (pages == NULL) + return 0; + + memset(mcl, 0, sizeof(mcl)); + + while (i < nr_pages) { + unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); + + mcl[j].op = __HYPERVISOR_update_va_mapping; + mcl[j].args[0] = va; + + mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; + + xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; + + if (j == 16 || i == nr_pages) { + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; + + reservation.nr_extents = j; + + mcl[j].op = __HYPERVISOR_memory_op; + mcl[j].args[0] = XENMEM_decrease_reservation; + mcl[j].args[1] = (unsigned long)&reservation; + + (void)HYPERVISOR_multicall(mcl, j+1); + + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; + j = 0; + } + } + + return (unsigned long)pages; +} + +#ifdef XEN_NETBACK_FIXUP_CSUM +static void +fixup_checksum(struct mbuf *m) +{ + struct ether_header *eh = mtod(m, struct ether_header *); + struct ip *ip = (struct ip *)(eh + 1); + int iphlen = ip->ip_hl << 2; + int iplen = ntohs(ip->ip_len); + + if ((m->m_pkthdr.csum_flags & CSUM_TCP)) { + struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen); + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(IPPROTO_TCP + (iplen - iphlen))); + th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen); + m->m_pkthdr.csum_flags &= ~CSUM_TCP; + } else { + u_short csum; + struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen); + uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(IPPROTO_UDP + (iplen - iphlen))); + if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0) + csum = 0xffff; + uh->uh_sum = csum; + m->m_pkthdr.csum_flags &= ~CSUM_UDP; + } +} +#endif + +/* Add the interface to the specified bridge */ +static int +add_to_bridge(struct ifnet *ifp, char *bridge) +{ + struct ifdrv ifd; + struct ifbreq ifb; + struct ifnet *ifp_bridge = ifunit(bridge); + + if (!ifp_bridge) + return ENOENT; + + bzero(&ifd, sizeof(ifd)); + bzero(&ifb, sizeof(ifb)); + + strcpy(ifb.ifbr_ifsname, ifp->if_xname); + strcpy(ifd.ifd_name, ifp->if_xname); + ifd.ifd_cmd = BRDGADD; + ifd.ifd_len = sizeof(ifb); + ifd.ifd_data = &ifb; + + return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd); + +} + +static int +netif_create(int handle, struct xenbus_device *xdev, char *bridge) +{ + netif_t *netif; + struct ifnet *ifp; + + netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO); + if (!netif) + return ENOMEM; + + netif->ref_cnt = 1; + netif->handle = handle; + netif->domid = xdev->otherend_id; + netif->xdev = xdev; + netif->bridge = bridge; + xdev->data = netif; + + /* Set up ifnet structure */ + ifp = netif->ifp = if_alloc(IFT_ETHER); + if (!ifp) { + if (bridge) + free(bridge, M_DEVBUF); + free(netif, M_DEVBUF); + return ENOMEM; + } + + ifp->if_softc = netif; + if_initname(ifp, "vif", + atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ ); + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX; + ifp->if_output = ether_output; + ifp->if_start = netback_start; + ifp->if_ioctl = netback_ioctl; + ifp->if_mtu = ETHERMTU; + ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; + + DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle); + + return 0; +} + +static void +netif_get(netif_t *netif) +{ + atomic_add_int(&netif->ref_cnt, 1); +} + +static void +netif_put(netif_t *netif) +{ + if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) { + DPRINTF("%s\n", IFNAME(netif)); + disconnect_rings(netif); + if (netif->ifp) { + if_free(netif->ifp); + netif->ifp = NULL; + } + if (netif->bridge) + free(netif->bridge, M_DEVBUF); + free(netif, M_DEVBUF); + } +} + +static int +netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + switch (cmd) { + case SIOCSIFFLAGS: + DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n", + IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags); + return 0; + } + + DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd); + + return ether_ioctl(ifp, cmd, data); +} + +static inline void +maybe_schedule_tx_action(void) +{ + smp_mb(); + if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list)) + taskqueue_enqueue(taskqueue_swi, &net_tx_task); +} + +/* Removes netif from front of list and does not call netif_put() (caller must) */ +static netif_t * +remove_from_tx_schedule_list(void) +{ + netif_t *netif; + + mtx_lock(&tx_sched_list_lock); + + if ((netif = STAILQ_FIRST(&tx_sched_list))) { + STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx); + STAILQ_NEXT(netif, next_tx) = NULL; + netif->on_tx_sched_list = 0; + } + + mtx_unlock(&tx_sched_list_lock); + + return netif; +} + +/* Adds netif to end of list and calls netif_get() */ +static void +add_to_tx_schedule_list_tail(netif_t *netif) +{ + if (netif->on_tx_sched_list) + return; + + mtx_lock(&tx_sched_list_lock); + if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { + netif_get(netif); + STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx); + netif->on_tx_sched_list = 1; + } + mtx_unlock(&tx_sched_list_lock); +} + +/* + * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER: + * If this driver is pipelining transmit requests then we can be very + * aggressive in avoiding new-packet notifications -- frontend only needs to + * send a notification if there are no outstanding unreceived responses. + * If we may be buffer transmit buffers for any reason then we must be rather + * more conservative and treat this as the final check for pending work. + */ +static void +netif_schedule_tx_work(netif_t *netif) +{ + int more_to_do; + +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER + more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx); +#else + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); +#endif + + if (more_to_do) { + DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif)); + add_to_tx_schedule_list_tail(netif); + maybe_schedule_tx_action(); + } +} + +static struct mtx dealloc_lock; +MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS); + +static void +netif_idx_release(uint16_t pending_idx) +{ + mtx_lock_spin(&dealloc_lock); + dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx; + mtx_unlock_spin(&dealloc_lock); + + taskqueue_enqueue(taskqueue_swi, &net_tx_task); +} + +static void +make_tx_response(netif_t *netif, + uint16_t id, + int8_t st) +{ + RING_IDX i = netif->tx.rsp_prod_pvt; + netif_tx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netif->tx, i); + resp->id = id; + resp->status = st; + + netif->tx.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify); + if (notify) + notify_remote_via_irq(netif->irq); + +#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER + if (i == netif->tx.req_cons) { + int more_to_do; + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do); + if (more_to_do) + add_to_tx_schedule_list_tail(netif); + } +#endif +} + +inline static void +net_tx_action_dealloc(void) +{ + gnttab_unmap_grant_ref_t *gop; + uint16_t pending_idx; + PEND_RING_IDX dc, dp; + netif_t *netif; + int ret; + + dc = dealloc_cons; + dp = dealloc_prod; + + /* + * Free up any grants we have finished using + */ + gop = tx_unmap_ops; + while (dc != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)]; + gop->host_addr = MMAP_VADDR(pending_idx); + gop->dev_bus_addr = 0; + gop->handle = grant_tx_handle[pending_idx]; + gop++; + } + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops); + BUG_ON(ret); + + while (dealloc_cons != dp) { + pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)]; + + netif = pending_tx_info[pending_idx].netif; + + make_tx_response(netif, pending_tx_info[pending_idx].req.id, + NETIF_RSP_OKAY); + + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + + netif_put(netif); + } +} + +static void +netif_page_release(void *buf, void *args) +{ + uint16_t pending_idx = (unsigned int)args; + + DDPRINTF("pending_idx=%u\n", pending_idx); + + KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx)); + + netif_idx_release(pending_idx); +} + +static void +net_tx_action(void *context, int pending) +{ + struct mbuf *m; + netif_t *netif; + netif_tx_request_t txreq; + uint16_t pending_idx; + RING_IDX i; + gnttab_map_grant_ref_t *mop; + int ret, work_to_do; + struct mbuf *txq = NULL, *txq_last = NULL; + + if (dealloc_cons != dealloc_prod) + net_tx_action_dealloc(); + + mop = tx_map_ops; + while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) { + + /* Get a netif from the list with work to do. */ + netif = remove_from_tx_schedule_list(); + + DDPRINTF("Processing %s (prod=%u, cons=%u)\n", + IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons); + + RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do); + if (!work_to_do) { + netif_put(netif); + continue; + } + + i = netif->tx.req_cons; + rmb(); /* Ensure that we see the request before we copy it. */ + memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq)); + + /* If we want credit-based scheduling, coud add it here - WORK */ + + netif->tx.req_cons++; + + netif_schedule_tx_work(netif); + + if (unlikely(txreq.size < ETHER_HDR_LEN) || + unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) { + WPRINTF("Bad packet size: %d\n", txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + /* No crossing a page as the payload mustn't fragment. */ + if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) { + WPRINTF("txreq.offset: %x, size: %u, end: %u\n", + txreq.offset, txreq.size, + (txreq.offset & PAGE_MASK) + txreq.size); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + continue; + } + + pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)]; + + MGETHDR(m, M_DONTWAIT, MT_DATA); + if (!m) { + WPRINTF("Failed to allocate mbuf\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + break; + } + m->m_pkthdr.rcvif = netif->ifp; + + if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) { + struct mbuf *n; + MGET(n, M_DONTWAIT, MT_DATA); + if (!(m->m_next = n)) { + m_freem(m); + WPRINTF("Failed to allocate second mbuf\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + break; + } + n->m_len = txreq.size - PKT_PROT_LEN; + m->m_len = PKT_PROT_LEN; + } else + m->m_len = txreq.size; + + mop->host_addr = MMAP_VADDR(pending_idx); + mop->dom = netif->domid; + mop->ref = txreq.gref; + mop->flags = GNTMAP_host_map | GNTMAP_readonly; + mop++; + + memcpy(&pending_tx_info[pending_idx].req, + &txreq, sizeof(txreq)); + pending_tx_info[pending_idx].netif = netif; + *((uint16_t *)m->m_data) = pending_idx; + + if (txq_last) + txq_last->m_nextpkt = m; + else + txq = m; + txq_last = m; + + pending_cons++; + + if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops)) + break; + } + + if (!txq) + return; + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops); + BUG_ON(ret); + + mop = tx_map_ops; + while ((m = txq) != NULL) { + caddr_t data; + + txq = m->m_nextpkt; + m->m_nextpkt = NULL; + + pending_idx = *((uint16_t *)m->m_data); + netif = pending_tx_info[pending_idx].netif; + memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq)); + + /* Check the remap error code. */ + if (unlikely(mop->status)) { + WPRINTF("#### netback grant fails\n"); + make_tx_response(netif, txreq.id, NETIF_RSP_ERROR); + netif_put(netif); + m_freem(m); + mop++; + pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx; + continue; + } + +#if 0 + /* Can't do this in FreeBSD since vtophys() returns the pfn */ + /* of the remote domain who loaned us the machine page - DPT */ + xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] = + mop->dev_bus_addr >> PAGE_SHIFT; +#endif + grant_tx_handle[pending_idx] = mop->handle; + + /* Setup data in mbuf (lengths are already set) */ + data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset); + bcopy(data, m->m_data, m->m_len); + if (m->m_next) { + struct mbuf *n = m->m_next; + MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release, + (void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV); + n->m_data = &data[PKT_PROT_LEN]; + } else { + /* Schedule a response immediately. */ + netif_idx_release(pending_idx); + } + + if ((txreq.flags & NETTXF_data_validated)) { + /* Tell the stack the checksums are okay */ + m->m_pkthdr.csum_flags |= + (CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + m->m_pkthdr.csum_data = 0xffff; + } + + /* If necessary, inform stack to compute the checksums if it forwards the packet */ + if ((txreq.flags & NETTXF_csum_blank)) { + struct ether_header *eh = mtod(m, struct ether_header *); + if (ntohs(eh->ether_type) == ETHERTYPE_IP) { + struct ip *ip = (struct ip *)&m->m_data[14]; + if (ip->ip_p == IPPROTO_TCP) + m->m_pkthdr.csum_flags |= CSUM_TCP; + else if (ip->ip_p == IPPROTO_UDP) + m->m_pkthdr.csum_flags |= CSUM_UDP; + } + } + + netif->ifp->if_ibytes += m->m_pkthdr.len; + netif->ifp->if_ipackets++; + + DDPRINTF("RECV %d bytes from %s (cflags=%x)\n", + m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags); + DPRINTF_MBUF_LEN(m, 128); + + (*netif->ifp->if_input)(netif->ifp, m); + + mop++; + } +} + +/* Handle interrupt from a frontend */ +static void +netback_intr(void *arg) +{ + netif_t *netif = arg; + DDPRINTF("%s\n", IFNAME(netif)); + add_to_tx_schedule_list_tail(netif); + maybe_schedule_tx_action(); +} + +/* Removes netif from front of list and does not call netif_put() (caller must) */ +static netif_t * +remove_from_rx_schedule_list(void) +{ + netif_t *netif; + + mtx_lock(&rx_sched_list_lock); + + if ((netif = STAILQ_FIRST(&rx_sched_list))) { + STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx); + STAILQ_NEXT(netif, next_rx) = NULL; + netif->on_rx_sched_list = 0; + } + + mtx_unlock(&rx_sched_list_lock); + + return netif; +} + +/* Adds netif to end of list and calls netif_get() */ +static void +add_to_rx_schedule_list_tail(netif_t *netif) +{ + if (netif->on_rx_sched_list) + return; + + mtx_lock(&rx_sched_list_lock); + if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) { + netif_get(netif); + STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx); + netif->on_rx_sched_list = 1; + } + mtx_unlock(&rx_sched_list_lock); +} + +static int +make_rx_response(netif_t *netif, uint16_t id, int8_t st, + uint16_t offset, uint16_t size, uint16_t flags) +{ + RING_IDX i = netif->rx.rsp_prod_pvt; + netif_rx_response_t *resp; + int notify; + + resp = RING_GET_RESPONSE(&netif->rx, i); + resp->offset = offset; + resp->flags = flags; + resp->id = id; + resp->status = (int16_t)size; + if (st < 0) + resp->status = (int16_t)st; + + DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n", + i, resp->offset, resp->flags, resp->id, resp->status); + + netif->rx.rsp_prod_pvt = ++i; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify); + + return notify; +} + +static int +netif_rx(netif_t *netif) +{ + struct ifnet *ifp = netif->ifp; + struct mbuf *m; + multicall_entry_t *mcl; + mmu_update_t *mmu; + gnttab_transfer_t *gop; + unsigned long vdata, old_mfn, new_mfn; + struct mbuf *rxq = NULL, *rxq_last = NULL; + int ret, notify = 0, pkts_dequeued = 0; + + DDPRINTF("%s\n", IFNAME(netif)); + + mcl = rx_mcl; + mmu = rx_mmu; + gop = grant_rx_op; + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + + /* Quit if the target domain has no receive buffers */ + if (netif->rx.req_cons == netif->rx.sring->req_prod) + break; + + IFQ_DRV_DEQUEUE(&ifp->if_snd, m); + if (m == NULL) + break; + + pkts_dequeued++; + + /* Check if we need to copy the data */ + if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) || + (*m->m_ext.ref_cnt > 1) || m->m_next != NULL) { + struct mbuf *n; + + DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n", + m->m_flags, + (m->m_flags & M_EXT) ? m->m_ext.ext_type : 0, + (m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0, + (unsigned int)m->m_next); + + /* Make copy */ + MGETHDR(n, M_DONTWAIT, MT_DATA); + if (!n) + goto drop; + + MCLGET(n, M_DONTWAIT); + if (!(n->m_flags & M_EXT)) { + m_freem(n); + goto drop; + } + + /* Leave space at front and keep current alignment */ + n->m_data += 16 + ((unsigned int)m->m_data & 0x3); + + if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) { + WPRINTF("pkt to big %d\n", m->m_pkthdr.len); + m_freem(n); + goto drop; + } + m_copydata(m, 0, m->m_pkthdr.len, n->m_data); + n->m_pkthdr.len = n->m_len = m->m_pkthdr.len; + n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA); + m_freem(m); + m = n; + } + + vdata = (unsigned long)m->m_data; + old_mfn = vtomach(vdata) >> PAGE_SHIFT; + + if ((new_mfn = alloc_mfn()) == 0) + goto drop; + +#ifdef XEN_NETBACK_FIXUP_CSUM + /* Check if we need to compute a checksum. This happens */ + /* when bridging from one domain to another. */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) + fixup_checksum(m); +#endif + + xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn; + + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = vdata; + mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A; + mcl->args[2] = 0; + mcl->args[3] = 0; + mcl++; + + gop->mfn = old_mfn; + gop->domid = netif->domid; + gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref; + netif->rx.req_cons++; + gop++; + + mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + mmu->val = vtophys(vdata) >> PAGE_SHIFT; + mmu++; + + if (rxq_last) + rxq_last->m_nextpkt = m; + else + rxq = m; + rxq_last = m; + + DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif)); + DPRINTF_MBUF_LEN(m, 128); + + /* Filled the batch queue? */ + if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op)) + break; + + continue; + drop: + DDPRINTF("dropping pkt\n"); + ifp->if_oerrors++; + m_freem(m); + } + + if (mcl == rx_mcl) + return pkts_dequeued; + + mcl->op = __HYPERVISOR_mmu_update; + mcl->args[0] = (unsigned long)rx_mmu; + mcl->args[1] = mmu - rx_mmu; + mcl->args[2] = 0; + mcl->args[3] = DOMID_SELF; + mcl++; + + mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL; + ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl); + BUG_ON(ret != 0); + + ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op); + BUG_ON(ret != 0); + + mcl = rx_mcl; + gop = grant_rx_op; + + while ((m = rxq) != NULL) { + int8_t status; + uint16_t id, flags = 0; + + rxq = m->m_nextpkt; + m->m_nextpkt = NULL; + + /* Rederive the machine addresses. */ + new_mfn = mcl->args[1] >> PAGE_SHIFT; + old_mfn = gop->mfn; + + ifp->if_obytes += m->m_pkthdr.len; + ifp->if_opackets++; + + /* The update_va_mapping() must not fail. */ + BUG_ON(mcl->result != 0); + + /* Setup flags */ + if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA)) + flags |= NETRXF_csum_blank | NETRXF_data_validated; + else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID)) + flags |= NETRXF_data_validated; + + /* Check the reassignment error code. */ + status = NETIF_RSP_OKAY; + if (gop->status != 0) { + DPRINTF("Bad status %d from grant transfer to DOM%u\n", + gop->status, netif->domid); + /* + * Page no longer belongs to us unless GNTST_bad_page, + * but that should be a fatal error anyway. + */ + BUG_ON(gop->status == GNTST_bad_page); + status = NETIF_RSP_ERROR; + } + id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id; + notify |= make_rx_response(netif, id, status, + (unsigned long)m->m_data & PAGE_MASK, + m->m_pkthdr.len, flags); + + m_freem(m); + mcl++; + gop++; + } + + if (notify) + notify_remote_via_irq(netif->irq); + + return pkts_dequeued; +} + +static void +rx_task_timer(void *arg) +{ + DDPRINTF("\n"); + taskqueue_enqueue(taskqueue_swi, &net_rx_task); +} + +static void +net_rx_action(void *context, int pending) +{ + netif_t *netif, *last_zero_work = NULL; + + DDPRINTF("\n"); + + while ((netif = remove_from_rx_schedule_list())) { + struct ifnet *ifp = netif->ifp; + + if (netif == last_zero_work) { + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + add_to_rx_schedule_list_tail(netif); + netif_put(netif); + if (!STAILQ_EMPTY(&rx_sched_list)) + callout_reset(&rx_task_callout, 1, rx_task_timer, NULL); + break; + } + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) { + if (netif_rx(netif)) + last_zero_work = NULL; + else if (!last_zero_work) + last_zero_work = netif; + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + add_to_rx_schedule_list_tail(netif); + } + + netif_put(netif); + } +} + +static void +netback_start(struct ifnet *ifp) +{ + netif_t *netif = (netif_t *)ifp->if_softc; + + DDPRINTF("%s\n", IFNAME(netif)); + + add_to_rx_schedule_list_tail(netif); + taskqueue_enqueue(taskqueue_swi, &net_rx_task); +} + +/* Map a grant ref to a ring */ +static int +map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) +{ + struct gnttab_map_grant_ref op; + + ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + if (ring->va == 0) + return ENOMEM; + + op.host_addr = ring->va; + op.flags = GNTMAP_host_map; + op.ref = ref; + op.dom = dom; + HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (op.status) { + WPRINTF("grant table op err=%d\n", op.status); + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; + return EACCES; + } + + ring->handle = op.handle; + ring->bus_addr = op.dev_bus_addr; + + return 0; +} + +/* Unmap grant ref for a ring */ +static void +unmap_ring(struct ring_ref *ring) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = ring->va; + op.dev_bus_addr = ring->bus_addr; + op.handle = ring->handle; + HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (op.status) + WPRINTF("grant table op err=%d\n", op.status); + + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; +} + +static int +connect_rings(netif_t *netif) +{ + struct xenbus_device *xdev = netif->xdev; + netif_tx_sring_t *txs; + netif_rx_sring_t *rxs; + unsigned long tx_ring_ref, rx_ring_ref; + evtchn_port_t evtchn; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + // Grab FE data and map his memory + err = xenbus_gather(NULL, xdev->otherend, + "tx-ring-ref", "%lu", &tx_ring_ref, + "rx-ring-ref", "%lu", &rx_ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(xdev, err, + "reading %s/ring-ref and event-channel", + xdev->otherend); + return err; + } + + err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref); + if (err) { + xenbus_dev_fatal(xdev, err, "mapping tx ring"); + return err; + } + txs = (netif_tx_sring_t *)netif->tx_ring_ref.va; + BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE); + + err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref); + if (err) { + unmap_ring(&netif->tx_ring_ref); + xenbus_dev_fatal(xdev, err, "mapping rx ring"); + return err; + } + rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va; + BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE); + + op.u.bind_interdomain.remote_dom = netif->domid; + op.u.bind_interdomain.remote_port = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_ring(&netif->tx_ring_ref); + unmap_ring(&netif->rx_ring_ref); + xenbus_dev_fatal(xdev, err, "binding event channel"); + return err; + } + netif->evtchn = op.u.bind_interdomain.local_port; + + /* bind evtchn to irq handler */ + netif->irq = + bind_evtchn_to_irqhandler(netif->evtchn, "netback", + netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie); + + netif->rings_connected = 1; + + DPRINTF("%s connected! evtchn=%d irq=%d\n", + IFNAME(netif), netif->evtchn, netif->irq); + + return 0; +} + +static void +disconnect_rings(netif_t *netif) +{ + DPRINTF("\n"); + + if (netif->rings_connected) { + unbind_from_irqhandler(netif->irq, netif->irq_cookie); + netif->irq = 0; + unmap_ring(&netif->tx_ring_ref); + unmap_ring(&netif->rx_ring_ref); + netif->rings_connected = 0; + } +} + +static void +connect(netif_t *netif) +{ + if (!netif->xdev || + !netif->attached || + netif->frontend_state != XenbusStateConnected) { + return; + } + + if (!connect_rings(netif)) { + xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected); + + /* Turn on interface */ + netif->ifp->if_drv_flags |= IFF_DRV_RUNNING; + netif->ifp->if_flags |= IFF_UP; + } +} + +static int +netback_remove(struct xenbus_device *xdev) +{ + netif_t *netif = xdev->data; + device_t ndev; + + DPRINTF("remove %s\n", xdev->nodename); + + if ((ndev = netif->ndev)) { + netif->ndev = NULL; + mtx_lock(&Giant); + device_detach(ndev); + mtx_unlock(&Giant); + } + + xdev->data = NULL; + netif->xdev = NULL; + netif_put(netif); + + return 0; +} + +/** + * Entry point to this code when a new device is created. Allocate the basic + * structures and the ring buffers for communication with the frontend. + * Switch to Connected state. + */ +static int +netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) +{ + int err; + long handle; + char *bridge; + + DPRINTF("node=%s\n", xdev->nodename); + + /* Grab the handle */ + err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle); + if (err != 1) { + xenbus_dev_fatal(xdev, err, "reading handle"); + return err; + } + + /* Check for bridge */ + bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL); + if (IS_ERR(bridge)) + bridge = NULL; + + err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait); + if (err) { + xenbus_dev_fatal(xdev, err, "writing switch state"); + return err; + } + + err = netif_create(handle, xdev, bridge); + if (err) { + xenbus_dev_fatal(xdev, err, "creating netif"); + return err; + } + + err = vif_add_dev(xdev); + if (err) { + netif_put((netif_t *)xdev->data); + xenbus_dev_fatal(xdev, err, "adding vif device"); + return err; + } + + return 0; +} + +/** + * We are reconnecting to the backend, due to a suspend/resume, or a backend + * driver restart. We tear down our netif structure and recreate it, but + * leave the device-layer structures intact so that this is transparent to the + * rest of the kernel. + */ +static int netback_resume(struct xenbus_device *xdev) +{ + DPRINTF("node=%s\n", xdev->nodename); + return 0; +} + + +/** + * Callback received when the frontend's state changes. + */ +static void frontend_changed(struct xenbus_device *xdev, + XenbusState frontend_state) +{ + netif_t *netif = xdev->data; + + DPRINTF("state=%d\n", frontend_state); + + netif->frontend_state = frontend_state; + + switch (frontend_state) { + case XenbusStateInitialising: + case XenbusStateInitialised: + break; + case XenbusStateConnected: + connect(netif); + break; + case XenbusStateClosing: + xenbus_switch_state(xdev, NULL, XenbusStateClosing); + break; + case XenbusStateClosed: + xenbus_remove_device(xdev); + break; + case XenbusStateUnknown: + case XenbusStateInitWait: + xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + +/* ** Driver registration ** */ + +static struct xenbus_device_id netback_ids[] = { + { "vif" }, + { "" } +}; + +static struct xenbus_driver netback = { + .name = "netback", + .ids = netback_ids, + .probe = netback_probe, + .remove = netback_remove, + .resume= netback_resume, + .otherend_changed = frontend_changed, +}; + +static void +netback_init(void *unused) +{ + callout_init(&rx_task_callout, CALLOUT_MPSAFE); + + mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS); + BUG_ON(!mmap_vstart); + + pending_cons = 0; + for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++) + pending_ring[pending_prod] = pending_prod; + + TASK_INIT(&net_tx_task, 0, net_tx_action, NULL); + TASK_INIT(&net_rx_task, 0, net_rx_action, NULL); + mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF); + mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF); + + DPRINTF("registering %s\n", netback.name); + + xenbus_register_backend(&netback); +} + +SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL) + +static int +vif_add_dev(struct xenbus_device *xdev) +{ + netif_t *netif = xdev->data; + device_t nexus, ndev; + devclass_t dc; + int err = 0; + + mtx_lock(&Giant); + + /* We will add a vif device as a child of nexus0 (for now) */ + if (!(dc = devclass_find("nexus")) || + !(nexus = devclass_get_device(dc, 0))) { + WPRINTF("could not find nexus0!\n"); + err = ENOENT; + goto done; + } + + + /* Create a newbus device representing the vif */ + ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit); + if (!ndev) { + WPRINTF("could not create newbus device %s!\n", IFNAME(netif)); + err = EFAULT; + goto done; + } + + netif_get(netif); + device_set_ivars(ndev, netif); + netif->ndev = ndev; + + device_probe_and_attach(ndev); + + done: + + mtx_unlock(&Giant); + + return err; +} + +enum { + VIF_SYSCTL_DOMID, + VIF_SYSCTL_HANDLE, + VIF_SYSCTL_TXRING, + VIF_SYSCTL_RXRING, +}; + +static char * +vif_sysctl_ring_info(netif_t *netif, int cmd) +{ + char *buf = malloc(256, M_DEVBUF, M_WAITOK); + if (buf) { + if (!netif->rings_connected) + sprintf(buf, "rings not connected\n"); + else if (cmd == VIF_SYSCTL_TXRING) { + netif_tx_back_ring_t *tx = &netif->tx; + sprintf(buf, "nr_ents=%x req_cons=%x" + " req_prod=%x req_event=%x" + " rsp_prod=%x rsp_event=%x", + tx->nr_ents, tx->req_cons, + tx->sring->req_prod, tx->sring->req_event, + tx->sring->rsp_prod, tx->sring->rsp_event); + } else { + netif_rx_back_ring_t *rx = &netif->rx; + sprintf(buf, "nr_ents=%x req_cons=%x" + " req_prod=%x req_event=%x" + " rsp_prod=%x rsp_event=%x", + rx->nr_ents, rx->req_cons, + rx->sring->req_prod, rx->sring->req_event, + rx->sring->rsp_prod, rx->sring->rsp_event); + } + } + return buf; +} + +static int +vif_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev = (device_t)arg1; + netif_t *netif = (netif_t *)device_get_ivars(dev); + const char *value; + char *buf = NULL; + int err; + + switch (arg2) { + case VIF_SYSCTL_DOMID: + return sysctl_handle_int(oidp, NULL, netif->domid, req); + case VIF_SYSCTL_HANDLE: + return sysctl_handle_int(oidp, NULL, netif->handle, req); + case VIF_SYSCTL_TXRING: + case VIF_SYSCTL_RXRING: + value = buf = vif_sysctl_ring_info(netif, arg2); + break; + default: + return (EINVAL); + } + + err = SYSCTL_OUT(req, value, strlen(value)); + if (buf != NULL) + free(buf, M_DEVBUF); + + return err; +} + +/* Newbus vif device driver probe */ +static int +vif_probe(device_t dev) +{ + DDPRINTF("vif%d\n", device_get_unit(dev)); + return 0; +} + +/* Newbus vif device driver attach */ +static int +vif_attach(device_t dev) +{ + netif_t *netif = (netif_t *)device_get_ivars(dev); + uint8_t mac[ETHER_ADDR_LEN]; + + DDPRINTF("%s\n", IFNAME(netif)); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, + dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I", + "domid of frontend"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD, + dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I", + "handle of frontend"); +#ifdef XEN_NETBACK_DEBUG + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "txring", CTLFLAG_RD, + dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A", + "tx ring info"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "rxring", CTLFLAG_RD, + dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A", + "rx ring info"); +#endif + + memset(mac, 0xff, sizeof(mac)); + mac[0] &= ~0x01; + + ether_ifattach(netif->ifp, mac); + netif->attached = 1; + + connect(netif); + + if (netif->bridge) { + DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge); + int err = add_to_bridge(netif->ifp, netif->bridge); + if (err) { + WPRINTF("Error adding %s to %s; err=%d\n", + IFNAME(netif), netif->bridge, err); + } + } + + return bus_generic_attach(dev); +} + +/* Newbus vif device driver detach */ +static int +vif_detach(device_t dev) +{ + netif_t *netif = (netif_t *)device_get_ivars(dev); + struct ifnet *ifp = netif->ifp; + + DDPRINTF("%s\n", IFNAME(netif)); + + /* Tell the stack that the interface is no longer active */ + ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + ether_ifdetach(ifp); + + bus_generic_detach(dev); + + netif->attached = 0; + + netif_put(netif); + + return 0; +} + +static device_method_t vif_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, vif_probe), + DEVMETHOD(device_attach, vif_attach), + DEVMETHOD(device_detach, vif_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + {0, 0} +}; + +static devclass_t vif_devclass; + +static driver_t vif_driver = { + "vif", + vif_methods, + 0, +}; + +DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0); + + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Property changes on: dev/xen/netback/netback.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: dev/xen/blkback/blkback.c =================================================================== --- dev/xen/blkback/blkback.c (.../stable/6/sys) (revision 0) +++ dev/xen/blkback/blkback.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1349 @@ +/* + * Copyright (c) 2006, Cisco Systems, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#if XEN_BLKBACK_DEBUG +#define DPRINTF(fmt, args...) \ + printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTF(fmt, args...) ((void)0) +#endif + +#define WPRINTF(fmt, args...) \ + printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) + +#define BLKBACK_INVALID_HANDLE (~0) + +struct ring_ref { + vm_offset_t va; + grant_handle_t handle; + uint64_t bus_addr; +}; + +typedef struct blkback_info { + + /* Schedule lists */ + STAILQ_ENTRY(blkback_info) next_req; + int on_req_sched_list; + + struct xenbus_device *xdev; + XenbusState frontend_state; + + domid_t domid; + + int state; + int ring_connected; + struct ring_ref rr; + blkif_back_ring_t ring; + evtchn_port_t evtchn; + int irq; + void *irq_cookie; + + int ref_cnt; + + int handle; + char *mode; + char *type; + char *dev_name; + + struct vnode *vn; + struct cdev *cdev; + struct cdevsw *csw; + u_int sector_size; + int sector_size_shift; + off_t media_size; + u_int media_num_sectors; + int major; + int minor; + int read_only; + + struct mtx blk_ring_lock; + + device_t ndev; + + /* Stats */ + int st_rd_req; + int st_wr_req; + int st_oo_req; + int st_err_req; +} blkif_t; + +/* + * These are rather arbitrary. They are fairly large because adjacent requests + * pulled from a communication ring are quite likely to end up being part of + * the same scatter/gather request at the disc. + * + * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** + * + * This will increase the chances of being able to write whole tracks. + * 64 should be enough to keep us competitive with Linux. + */ +static int blkif_reqs = 64; +TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs); + +static int mmap_pages; + +/* + * Each outstanding request that we've passed to the lower device layers has a + * 'pending_req' allocated to it. Each buffer_head that completes decrements + * the pendcnt towards zero. When it hits zero, the specified domain has a + * response queued for it, with the saved 'id' passed back. + */ +typedef struct pending_req { + blkif_t *blkif; + uint64_t id; + int nr_pages; + int pendcnt; + unsigned short operation; + int status; + STAILQ_ENTRY(pending_req) free_list; +} pending_req_t; + +static pending_req_t *pending_reqs; +static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free = + STAILQ_HEAD_INITIALIZER(pending_free); +static struct mtx pending_free_lock; + +static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list = + STAILQ_HEAD_INITIALIZER(req_sched_list); +static struct mtx req_sched_list_lock; + +static unsigned long mmap_vstart; +static unsigned long *pending_vaddrs; +static grant_handle_t *pending_grant_handles; + +static struct task blk_req_task; + +/* Protos */ +static void disconnect_ring(blkif_t *blkif); +static int vbd_add_dev(struct xenbus_device *xdev); + +static inline int vaddr_pagenr(pending_req_t *req, int seg) +{ + return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; +} + +static inline unsigned long vaddr(pending_req_t *req, int seg) +{ + return pending_vaddrs[vaddr_pagenr(req, seg)]; +} + +#define pending_handle(_req, _seg) \ + (pending_grant_handles[vaddr_pagenr(_req, _seg)]) + +static unsigned long +alloc_empty_page_range(unsigned long nr_pages) +{ + void *pages; + int i = 0, j = 0; + multicall_entry_t mcl[17]; + unsigned long mfn_list[16]; + struct xen_memory_reservation reservation = { + .extent_start = mfn_list, + .nr_extents = 0, + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF + }; + + pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (pages == NULL) + return 0; + + memset(mcl, 0, sizeof(mcl)); + + while (i < nr_pages) { + unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); + + mcl[j].op = __HYPERVISOR_update_va_mapping; + mcl[j].args[0] = va; + + mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; + + xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; + + if (j == 16 || i == nr_pages) { + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; + + reservation.nr_extents = j; + + mcl[j].op = __HYPERVISOR_memory_op; + mcl[j].args[0] = XENMEM_decrease_reservation; + mcl[j].args[1] = (unsigned long)&reservation; + + (void)HYPERVISOR_multicall(mcl, j+1); + + mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; + j = 0; + } + } + + return (unsigned long)pages; +} + +static pending_req_t * +alloc_req(void) +{ + pending_req_t *req; + mtx_lock(&pending_free_lock); + if ((req = STAILQ_FIRST(&pending_free))) { + STAILQ_REMOVE(&pending_free, req, pending_req, free_list); + STAILQ_NEXT(req, free_list) = NULL; + } + mtx_unlock(&pending_free_lock); + return req; +} + +static void +free_req(pending_req_t *req) +{ + int was_empty; + + mtx_lock(&pending_free_lock); + was_empty = STAILQ_EMPTY(&pending_free); + STAILQ_INSERT_TAIL(&pending_free, req, free_list); + mtx_unlock(&pending_free_lock); + if (was_empty) + taskqueue_enqueue(taskqueue_swi, &blk_req_task); +} + +static void +fast_flush_area(pending_req_t *req) +{ + struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int i, invcount = 0; + grant_handle_t handle; + int ret; + + for (i = 0; i < req->nr_pages; i++) { + handle = pending_handle(req, i); + if (handle == BLKBACK_INVALID_HANDLE) + continue; + unmap[invcount].host_addr = vaddr(req, i); + unmap[invcount].dev_bus_addr = 0; + unmap[invcount].handle = handle; + pending_handle(req, i) = BLKBACK_INVALID_HANDLE; + invcount++; + } + + ret = HYPERVISOR_grant_table_op( + GNTTABOP_unmap_grant_ref, unmap, invcount); + PANIC_IF(ret); +} + +static void +blkif_get(blkif_t *blkif) +{ + atomic_add_int(&blkif->ref_cnt, 1); +} + +static void +blkif_put(blkif_t *blkif) +{ + if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) { + DPRINTF("Removing %x\n", (unsigned int)blkif); + disconnect_ring(blkif); + if (blkif->mode) + free(blkif->mode, M_DEVBUF); + if (blkif->type) + free(blkif->type, M_DEVBUF); + if (blkif->dev_name) + free(blkif->dev_name, M_DEVBUF); + free(blkif, M_DEVBUF); + } +} + +static int +blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params) +{ + blkif_t *blkif; + + blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO); + if (!blkif) + return ENOMEM; + + DPRINTF("Created %x\n", (unsigned int)blkif); + + blkif->ref_cnt = 1; + blkif->domid = xdev->otherend_id; + blkif->handle = handle; + blkif->mode = mode; + blkif->type = type; + blkif->dev_name = params; + blkif->xdev = xdev; + xdev->data = blkif; + + mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF); + + if (strcmp(mode, "w")) + blkif->read_only = 1; + + return 0; +} + +static void +add_to_req_schedule_list_tail(blkif_t *blkif) +{ + if (!blkif->on_req_sched_list) { + mtx_lock(&req_sched_list_lock); + if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) { + blkif_get(blkif); + STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); + blkif->on_req_sched_list = 1; + taskqueue_enqueue(taskqueue_swi, &blk_req_task); + } + mtx_unlock(&req_sched_list_lock); + } +} + +/* This routine does not call blkif_get(), does not schedule the blk_req_task to run, + and assumes that the state is connected */ +static void +add_to_req_schedule_list_tail2(blkif_t *blkif) +{ + mtx_lock(&req_sched_list_lock); + if (!blkif->on_req_sched_list) { + STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); + blkif->on_req_sched_list = 1; + } + mtx_unlock(&req_sched_list_lock); +} + +/* Removes blkif from front of list and does not call blkif_put() (caller must) */ +static blkif_t * +remove_from_req_schedule_list(void) +{ + blkif_t *blkif; + + mtx_lock(&req_sched_list_lock); + + if ((blkif = STAILQ_FIRST(&req_sched_list))) { + STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req); + STAILQ_NEXT(blkif, next_req) = NULL; + blkif->on_req_sched_list = 0; + } + + mtx_unlock(&req_sched_list_lock); + + return blkif; +} + +static void +make_response(blkif_t *blkif, uint64_t id, + unsigned short op, int st) +{ + blkif_response_t *resp; + blkif_back_ring_t *blk_ring = &blkif->ring; + int more_to_do = 0; + int notify; + + mtx_lock(&blkif->blk_ring_lock); + + + /* Place on the response ring for the relevant domain. */ + resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); + resp->id = id; + resp->operation = op; + resp->status = st; + blk_ring->rsp_prod_pvt++; + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify); + + if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) { + /* + * Tail check for pending requests. Allows frontend to avoid + * notifications if requests are already in flight (lower + * overheads and promotes batching). + */ + RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do); + + } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) + more_to_do = 1; + + mtx_unlock(&blkif->blk_ring_lock); + + if (more_to_do) + add_to_req_schedule_list_tail(blkif); + + if (notify) + notify_remote_via_irq(blkif->irq); +} + +static void +end_block_io_op(struct bio *bio) +{ + pending_req_t *pending_req = bio->bio_caller2; + + if (bio->bio_error) { + DPRINTF("BIO returned error %d for operation on device %s\n", + bio->bio_error, pending_req->blkif->dev_name); + pending_req->status = BLKIF_RSP_ERROR; + pending_req->blkif->st_err_req++; + } + +#if 0 + printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n", + (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags); +#endif + + if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) { + fast_flush_area(pending_req); + make_response(pending_req->blkif, pending_req->id, + pending_req->operation, pending_req->status); + blkif_put(pending_req->blkif); + free_req(pending_req); + } + + g_destroy_bio(bio); +} + +static void +dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req) +{ + struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + struct { + unsigned long buf; unsigned int nsec; + } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + unsigned int nseg = req->nr_segments, nr_sects = 0; + struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + int operation, ret, i, nbio = 0; + + /* Check that number of segments is sane. */ + if (unlikely(nseg == 0) || + unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + DPRINTF("Bad number of segments in request (%d)\n", nseg); + goto fail_response; + } + + if (req->operation == BLKIF_OP_WRITE) { + if (blkif->read_only) { + DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name); + goto fail_response; + } + operation = BIO_WRITE; + } else + operation = BIO_READ; + + pending_req->blkif = blkif; + pending_req->id = req->id; + pending_req->operation = req->operation; + pending_req->status = BLKIF_RSP_OKAY; + pending_req->nr_pages = nseg; + + for (i = 0; i < nseg; i++) { + seg[i].nsec = req->seg[i].last_sect - + req->seg[i].first_sect + 1; + + if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || + (seg[i].nsec <= 0)) + goto fail_response; + nr_sects += seg[i].nsec; + + map[i].host_addr = vaddr(pending_req, i); + map[i].dom = blkif->domid; + map[i].ref = req->seg[i].gref; + map[i].flags = GNTMAP_host_map; + if (operation == BIO_WRITE) + map[i].flags |= GNTMAP_readonly; + } + + /* Convert to the disk's sector size */ + nr_sects = (nr_sects << 9) >> blkif->sector_size_shift; + + ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); + PANIC_IF(ret); + + for (i = 0; i < nseg; i++) { + if (unlikely(map[i].status != 0)) { + DPRINTF("invalid buffer -- could not remap it\n"); + goto fail_flush; + } + + pending_handle(pending_req, i) = map[i].handle; +#if 0 + /* Can't do this in FreeBSD since vtophys() returns the pfn */ + /* of the remote domain who loaned us the machine page - DPT */ + xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] = + map[i]dev_bus_addr >> PAGE_SHIFT; +#endif + seg[i].buf = map[i].dev_bus_addr | + (req->seg[i].first_sect << 9); + } + + if (req->sector_number + nr_sects > blkif->media_num_sectors) { + DPRINTF("%s of [%llu,%llu] extends past end of device %s\n", + operation == BIO_READ ? "read" : "write", + req->sector_number, + req->sector_number + nr_sects, blkif->dev_name); + goto fail_flush; + } + + for (i = 0; i < nseg; i++) { + struct bio *bio; + + if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) { + DPRINTF("Misaligned I/O request from domain %d", blkif->domid); + goto fail_put_bio; + } + + bio = biolist[nbio++] = g_new_bio(); + if (unlikely(bio == NULL)) + goto fail_put_bio; + + bio->bio_cmd = operation; + bio->bio_offset = req->sector_number << blkif->sector_size_shift; + bio->bio_length = seg[i].nsec << 9; + bio->bio_bcount = bio->bio_length; + bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK)); + bio->bio_done = end_block_io_op; + bio->bio_caller2 = pending_req; + bio->bio_dev = blkif->cdev; + + req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift; +#if 0 + printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n", + (unsigned int)bio, req->operation, req->sector_number, seg[i].nsec, + blkif->cdev->si_iosize_max, seg[i].buf); +#endif + } + + pending_req->pendcnt = nbio; + blkif_get(blkif); + + for (i = 0; i < nbio; i++) + (*blkif->csw->d_strategy)(biolist[i]); + + return; + + fail_put_bio: + for (i = 0; i < (nbio-1); i++) + g_destroy_bio(biolist[i]); + fail_flush: + fast_flush_area(pending_req); + fail_response: + make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); + free_req(pending_req); +} + +static void +blk_req_action(void *context, int pending) +{ + blkif_t *blkif; + + DPRINTF("\n"); + + while (!STAILQ_EMPTY(&req_sched_list)) { + blkif_back_ring_t *blk_ring; + RING_IDX rc, rp; + + blkif = remove_from_req_schedule_list(); + + blk_ring = &blkif->ring; + rc = blk_ring->req_cons; + rp = blk_ring->sring->req_prod; + rmb(); /* Ensure we see queued requests up to 'rp'. */ + + while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) { + blkif_request_t *req; + pending_req_t *pending_req; + + pending_req = alloc_req(); + if (pending_req == NULL) + goto out_of_preqs; + + req = RING_GET_REQUEST(blk_ring, rc); + blk_ring->req_cons = ++rc; /* before make_response() */ + + switch (req->operation) { + case BLKIF_OP_READ: + blkif->st_rd_req++; + dispatch_rw_block_io(blkif, req, pending_req); + break; + case BLKIF_OP_WRITE: + blkif->st_wr_req++; + dispatch_rw_block_io(blkif, req, pending_req); + break; + default: + blkif->st_err_req++; + DPRINTF("error: unknown block io operation [%d]\n", + req->operation); + make_response(blkif, req->id, req->operation, + BLKIF_RSP_ERROR); + free_req(pending_req); + break; + } + } + + blkif_put(blkif); + } + + return; + + out_of_preqs: + /* We ran out of pending req structs */ + /* Just requeue interface and wait to be rescheduled to run when one is freed */ + add_to_req_schedule_list_tail2(blkif); + blkif->st_oo_req++; +} + +/* Handle interrupt from a frontend */ +static void +blkback_intr(void *arg) +{ + blkif_t *blkif = arg; + DPRINTF("%x\n", (unsigned int)blkif); + add_to_req_schedule_list_tail(blkif); +} + +/* Map grant ref for ring */ +static int +map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) +{ + struct gnttab_map_grant_ref op; + + ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); + if (ring->va == 0) + return ENOMEM; + + op.host_addr = ring->va; + op.flags = GNTMAP_host_map; + op.ref = ref; + op.dom = dom; + HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); + if (op.status) { + WPRINTF("grant table op err=%d\n", op.status); + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; + return EACCES; + } + + ring->handle = op.handle; + ring->bus_addr = op.dev_bus_addr; + + return 0; +} + +/* Unmap grant ref for ring */ +static void +unmap_ring(struct ring_ref *ring) +{ + struct gnttab_unmap_grant_ref op; + + op.host_addr = ring->va; + op.dev_bus_addr = ring->bus_addr; + op.handle = ring->handle; + HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); + if (op.status) + WPRINTF("grant table op err=%d\n", op.status); + + kmem_free(kernel_map, ring->va, PAGE_SIZE); + ring->va = 0; +} + +static int +connect_ring(blkif_t *blkif) +{ + struct xenbus_device *xdev = blkif->xdev; + blkif_sring_t *ring; + unsigned long ring_ref; + evtchn_port_t evtchn; + evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; + int err; + + if (blkif->ring_connected) + return 0; + + // Grab FE data and map his memory + err = xenbus_gather(NULL, xdev->otherend, + "ring-ref", "%lu", &ring_ref, + "event-channel", "%u", &evtchn, NULL); + if (err) { + xenbus_dev_fatal(xdev, err, + "reading %s/ring-ref and event-channel", + xdev->otherend); + return err; + } + + err = map_ring(ring_ref, blkif->domid, &blkif->rr); + if (err) { + xenbus_dev_fatal(xdev, err, "mapping ring"); + return err; + } + ring = (blkif_sring_t *)blkif->rr.va; + BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE); + + op.u.bind_interdomain.remote_dom = blkif->domid; + op.u.bind_interdomain.remote_port = evtchn; + err = HYPERVISOR_event_channel_op(&op); + if (err) { + unmap_ring(&blkif->rr); + xenbus_dev_fatal(xdev, err, "binding event channel"); + return err; + } + blkif->evtchn = op.u.bind_interdomain.local_port; + + /* bind evtchn to irq handler */ + blkif->irq = + bind_evtchn_to_irqhandler(blkif->evtchn, "blkback", + blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie); + + blkif->ring_connected = 1; + + DPRINTF("%x rings connected! evtchn=%d irq=%d\n", + (unsigned int)blkif, blkif->evtchn, blkif->irq); + + return 0; +} + +static void +disconnect_ring(blkif_t *blkif) +{ + DPRINTF("\n"); + + if (blkif->ring_connected) { + unbind_from_irqhandler(blkif->irq, blkif->irq_cookie); + blkif->irq = 0; + unmap_ring(&blkif->rr); + blkif->ring_connected = 0; + } +} + +static void +connect(blkif_t *blkif) +{ + struct xenbus_transaction *xbt; + struct xenbus_device *xdev = blkif->xdev; + int err; + + if (!blkif->ring_connected || + blkif->vn == NULL || + blkif->state == XenbusStateConnected) + return; + + DPRINTF("%s\n", xdev->otherend); + + /* Supply the information about the device the frontend needs */ +again: + xbt = xenbus_transaction_start(); + if (IS_ERR(xbt)) { + xenbus_dev_fatal(xdev, PTR_ERR(xbt), + "Error writing configuration for backend " + "(start transaction)"); + return; + } + + err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u", + blkif->media_num_sectors); + if (err) { + xenbus_dev_fatal(xdev, err, "writing %s/sectors", + xdev->nodename); + goto abort; + } + + err = xenbus_printf(xbt, xdev->nodename, "info", "%u", + blkif->read_only ? VDISK_READONLY : 0); + if (err) { + xenbus_dev_fatal(xdev, err, "writing %s/info", + xdev->nodename); + goto abort; + } + err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u", + blkif->sector_size); + if (err) { + xenbus_dev_fatal(xdev, err, "writing %s/sector-size", + xdev->nodename); + goto abort; + } + + err = xenbus_transaction_end(xbt, 0); + if (err == -EAGAIN) + goto again; + if (err) + xenbus_dev_fatal(xdev, err, "ending transaction"); + + err = xenbus_switch_state(xdev, NULL, XenbusStateConnected); + if (err) + xenbus_dev_fatal(xdev, err, "switching to Connected state", + xdev->nodename); + + blkif->state = XenbusStateConnected; + + return; + + abort: + xenbus_transaction_end(xbt, 1); +} + +static int +blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) +{ + int err; + char *p, *mode = NULL, *type = NULL, *params = NULL; + long handle; + + DPRINTF("node=%s\n", xdev->nodename); + + p = strrchr(xdev->otherend, '/') + 1; + handle = strtoul(p, NULL, 0); + + mode = xenbus_read(NULL, xdev->nodename, "mode", NULL); + if (IS_ERR(mode)) { + xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode"); + err = PTR_ERR(mode); + goto error; + } + + type = xenbus_read(NULL, xdev->nodename, "type", NULL); + if (IS_ERR(type)) { + xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type"); + err = PTR_ERR(type); + goto error; + } + + params = xenbus_read(NULL, xdev->nodename, "params", NULL); + if (IS_ERR(type)) { + xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params"); + err = PTR_ERR(params); + goto error; + } + + err = blkif_create(xdev, handle, mode, type, params); + if (err) { + xenbus_dev_fatal(xdev, err, "creating blkif"); + goto error; + } + + err = vbd_add_dev(xdev); + if (err) { + blkif_put((blkif_t *)xdev->data); + xenbus_dev_fatal(xdev, err, "adding vbd device"); + } + + return err; + + error: + if (mode) + free(mode, M_DEVBUF); + if (type) + free(type, M_DEVBUF); + if (params) + free(params, M_DEVBUF); + return err; +} + +static int +blkback_remove(struct xenbus_device *xdev) +{ + blkif_t *blkif = xdev->data; + device_t ndev; + + DPRINTF("node=%s\n", xdev->nodename); + + blkif->state = XenbusStateClosing; + + if ((ndev = blkif->ndev)) { + blkif->ndev = NULL; + mtx_lock(&Giant); + device_detach(ndev); + mtx_unlock(&Giant); + } + + xdev->data = NULL; + blkif->xdev = NULL; + blkif_put(blkif); + + return 0; +} + +static int +blkback_resume(struct xenbus_device *xdev) +{ + DPRINTF("node=%s\n", xdev->nodename); + return 0; +} + +static void +frontend_changed(struct xenbus_device *xdev, + XenbusState frontend_state) +{ + blkif_t *blkif = xdev->data; + + DPRINTF("state=%d\n", frontend_state); + + blkif->frontend_state = frontend_state; + + switch (frontend_state) { + case XenbusStateInitialising: + break; + case XenbusStateInitialised: + case XenbusStateConnected: + connect_ring(blkif); + connect(blkif); + break; + case XenbusStateClosing: + xenbus_switch_state(xdev, NULL, XenbusStateClosing); + break; + case XenbusStateClosed: + xenbus_remove_device(xdev); + break; + case XenbusStateUnknown: + case XenbusStateInitWait: + xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", + frontend_state); + break; + } +} + +/* ** Driver registration ** */ + +static struct xenbus_device_id blkback_ids[] = { + { "vbd" }, + { "" } +}; + +static struct xenbus_driver blkback = { + .name = "blkback", + .ids = blkback_ids, + .probe = blkback_probe, + .remove = blkback_remove, + .resume = blkback_resume, + .otherend_changed = frontend_changed, +}; + +static void +blkback_init(void *unused) +{ + int i; + + TASK_INIT(&blk_req_task, 0, blk_req_action, NULL); + mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF); + + mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF); + + mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; + pending_reqs = malloc(sizeof(pending_reqs[0]) * + blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT); + pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) * + mmap_pages, M_DEVBUF, M_NOWAIT); + pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) * + mmap_pages, M_DEVBUF, M_NOWAIT); + mmap_vstart = alloc_empty_page_range(mmap_pages); + if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) { + if (pending_reqs) + free(pending_reqs, M_DEVBUF); + if (pending_grant_handles) + free(pending_grant_handles, M_DEVBUF); + if (pending_vaddrs) + free(pending_vaddrs, M_DEVBUF); + WPRINTF("out of memory\n"); + return; + } + + for (i = 0; i < mmap_pages; i++) { + pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); + pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; + } + + for (i = 0; i < blkif_reqs; i++) { + STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list); + } + + DPRINTF("registering %s\n", blkback.name); + xenbus_register_backend(&blkback); +} + +SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL) + +static void +close_device(blkif_t *blkif) +{ + DPRINTF("closing dev=%s\n", blkif->dev_name); + if (blkif->vn) { + int flags = FREAD; + + if (!blkif->read_only) + flags |= FWRITE; + + if (blkif->csw) { + dev_relthread(blkif->cdev); + blkif->csw = NULL; + } + + (void)vn_close(blkif->vn, flags, NOCRED, curthread); + blkif->vn = NULL; + } +} + +static int +open_device(blkif_t *blkif) +{ + struct nameidata nd; + struct vattr vattr; + struct cdev *dev; + struct cdevsw *devsw; + int flags = FREAD, err = 0; + + DPRINTF("opening dev=%s\n", blkif->dev_name); + + if (!blkif->read_only) + flags |= FWRITE; + + if (!curthread->td_proc->p_fd->fd_cdir) { + curthread->td_proc->p_fd->fd_cdir = rootvnode; + VREF(rootvnode); + } + if (!curthread->td_proc->p_fd->fd_rdir) { + curthread->td_proc->p_fd->fd_rdir = rootvnode; + VREF(rootvnode); + } + if (!curthread->td_proc->p_fd->fd_jdir) { + curthread->td_proc->p_fd->fd_jdir = rootvnode; + VREF(rootvnode); + } + + again: + NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread); + err = vn_open(&nd, &flags, 0, -1); + if (err) { + if (blkif->dev_name[0] != '/') { + char *dev_path = "/dev/"; + char *dev_name; + + /* Try adding device path at beginning of name */ + dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT); + if (dev_name) { + sprintf(dev_name, "%s%s", dev_path, blkif->dev_name); + free(blkif->dev_name, M_DEVBUF); + blkif->dev_name = dev_name; + goto again; + } + } + xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name); + return err; + } + NDFREE(&nd, NDF_ONLY_PNBUF); + + blkif->vn = nd.ni_vp; + + /* We only support disks for now */ + if (!vn_isdisk(blkif->vn, &err)) { + xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name); + VOP_UNLOCK(blkif->vn, 0, curthread); + goto error; + } + + blkif->cdev = blkif->vn->v_rdev; + blkif->csw = dev_refthread(blkif->cdev); + PANIC_IF(blkif->csw == NULL); + + err = VOP_GETATTR(blkif->vn, &vattr, NOCRED); + if (err) { + xenbus_dev_fatal(blkif->xdev, err, + "error getting vnode attributes for device %s", blkif->dev_name); + VOP_UNLOCK(blkif->vn, 0, curthread); + goto error; + } + + VOP_UNLOCK(blkif->vn, 0, curthread); + + dev = blkif->vn->v_rdev; + devsw = dev->si_devsw; + if (!devsw->d_ioctl) { + err = ENODEV; + xenbus_dev_fatal(blkif->xdev, err, + "no d_ioctl for device %s!", blkif->dev_name); + goto error; + } + + err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread); + if (err) { + xenbus_dev_fatal(blkif->xdev, err, + "error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name); + goto error; + } + blkif->sector_size_shift = fls(blkif->sector_size) - 1; + + err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread); + if (err) { + xenbus_dev_fatal(blkif->xdev, err, + "error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name); + goto error; + } + blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift; + + blkif->major = umajor(vattr.va_rdev); + blkif->minor = uminor(vattr.va_rdev); + + DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n", + blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size); + + return 0; + + error: + close_device(blkif); + return err; +} + +static int +vbd_add_dev(struct xenbus_device *xdev) +{ + blkif_t *blkif = xdev->data; + device_t nexus, ndev; + devclass_t dc; + int err = 0; + + mtx_lock(&Giant); + + /* We will add a vbd device as a child of nexus0 (for now) */ + if (!(dc = devclass_find("nexus")) || + !(nexus = devclass_get_device(dc, 0))) { + WPRINTF("could not find nexus0!\n"); + err = ENOENT; + goto done; + } + + + /* Create a newbus device representing the vbd */ + ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle); + if (!ndev) { + WPRINTF("could not create newbus device vbd%d!\n", blkif->handle); + err = EFAULT; + goto done; + } + + blkif_get(blkif); + device_set_ivars(ndev, blkif); + blkif->ndev = ndev; + + device_probe_and_attach(ndev); + + done: + + mtx_unlock(&Giant); + + return err; +} + +enum { + VBD_SYSCTL_DOMID, + VBD_SYSCTL_ST_RD_REQ, + VBD_SYSCTL_ST_WR_REQ, + VBD_SYSCTL_ST_OO_REQ, + VBD_SYSCTL_ST_ERR_REQ, + VBD_SYSCTL_RING, +}; + +static char * +vbd_sysctl_ring_info(blkif_t *blkif, int cmd) +{ + char *buf = malloc(256, M_DEVBUF, M_WAITOK); + if (buf) { + if (!blkif->ring_connected) + sprintf(buf, "ring not connected\n"); + else { + blkif_back_ring_t *ring = &blkif->ring; + sprintf(buf, "nr_ents=%x req_cons=%x" + " req_prod=%x req_event=%x" + " rsp_prod=%x rsp_event=%x", + ring->nr_ents, ring->req_cons, + ring->sring->req_prod, ring->sring->req_event, + ring->sring->rsp_prod, ring->sring->rsp_event); + } + } + return buf; +} + +static int +vbd_sysctl_handler(SYSCTL_HANDLER_ARGS) +{ + device_t dev = (device_t)arg1; + blkif_t *blkif = (blkif_t *)device_get_ivars(dev); + const char *value; + char *buf = NULL; + int err; + + switch (arg2) { + case VBD_SYSCTL_DOMID: + return sysctl_handle_int(oidp, NULL, blkif->domid, req); + case VBD_SYSCTL_ST_RD_REQ: + return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req); + case VBD_SYSCTL_ST_WR_REQ: + return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req); + case VBD_SYSCTL_ST_OO_REQ: + return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req); + case VBD_SYSCTL_ST_ERR_REQ: + return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req); + case VBD_SYSCTL_RING: + value = buf = vbd_sysctl_ring_info(blkif, arg2); + break; + default: + return (EINVAL); + } + + err = SYSCTL_OUT(req, value, strlen(value)); + if (buf != NULL) + free(buf, M_DEVBUF); + + return err; +} + +/* Newbus vbd device driver probe */ +static int +vbd_probe(device_t dev) +{ + DPRINTF("vbd%d\n", device_get_unit(dev)); + return 0; +} + +/* Newbus vbd device driver attach */ +static int +vbd_attach(device_t dev) +{ + blkif_t *blkif = (blkif_t *)device_get_ivars(dev); + + DPRINTF("%s\n", blkif->dev_name); + + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, + dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I", + "domid of frontend"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD, + dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I", + "number of read reqs"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD, + dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I", + "number of write reqs"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD, + dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I", + "number of deferred reqs"); + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD, + dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I", + "number of reqs that returned error"); +#if XEN_BLKBACK_DEBUG + SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), + OID_AUTO, "ring", CTLFLAG_RD, + dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A", + "req ring info"); +#endif + + if (!open_device(blkif)) + connect(blkif); + + return bus_generic_attach(dev); +} + +/* Newbus vbd device driver detach */ +static int +vbd_detach(device_t dev) +{ + blkif_t *blkif = (blkif_t *)device_get_ivars(dev); + + DPRINTF("%s\n", blkif->dev_name); + + close_device(blkif); + + bus_generic_detach(dev); + + blkif_put(blkif); + + return 0; +} + +static device_method_t vbd_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, vbd_probe), + DEVMETHOD(device_attach, vbd_attach), + DEVMETHOD(device_detach, vbd_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + {0, 0} +}; + +static devclass_t vbd_devclass; + +static driver_t vbd_driver = { + "vbd", + vbd_methods, + 0, +}; + +DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0); + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: t + * End: + */ Property changes on: dev/xen/blkback/blkback.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: libkern/strcspn.c =================================================================== --- libkern/strcspn.c (.../stable/6/sys) (revision 0) +++ libkern/strcspn.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,72 @@ +/*- + * Copyright (c) 2005 David Schultz + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#define IDX(c) ((u_char)(c) / LONG_BIT) +#define BIT(c) ((u_long)1 << ((u_char)(c) % LONG_BIT)) + +size_t +strcspn(const char *s, const char *charset) +{ + /* + * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to + * generate better code. Without them, gcc gets a little confused. + */ + const char *s1; + u_long bit; + u_long tbl[(UCHAR_MAX + 1) / LONG_BIT]; + int idx; + + if(*s == '\0') + return (0); + +#if LONG_BIT == 64 /* always better to unroll on 64-bit architectures */ + tbl[0] = 1; + tbl[3] = tbl[2] = tbl[1] = 0; +#else + for (tbl[0] = idx = 1; idx < sizeof(tbl) / sizeof(tbl[0]); idx++) + tbl[idx] = 0; +#endif + for (; *charset != '\0'; charset++) { + idx = IDX(*charset); + bit = BIT(*charset); + tbl[idx] |= bit; + } + + for(s1 = s; ; s1++) { + idx = IDX(*s1); + bit = BIT(*s1); + if ((tbl[idx] & bit) != 0) + break; + } + return (s1 - s); +} Property changes on: libkern/strcspn.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/smp.h =================================================================== --- i386/include/smp.h (.../stable/6/sys) (revision 184012) +++ i386/include/smp.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -68,7 +68,9 @@ void ipi_all(u_int ipi); void ipi_all_but_self(u_int ipi); void ipi_self(u_int ipi); +#ifndef XEN void ipi_bitmap_handler(struct clockframe frame); +#endif u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); void mp_topology(void); @@ -85,7 +87,14 @@ int ipi_nmi_handler(void); void ipi_nmi_selected(u_int32_t cpus); #endif +#ifdef XEN +void ipi_to_irq_init(void); +#define RESCHEDULE_VECTOR 0 +#define CALL_FUNCTION_VECTOR 1 +#define NR_IPIS 2 + +#endif #endif /* !LOCORE */ #endif /* SMP */ Index: i386/include/param.h =================================================================== --- i386/include/param.h (.../stable/6/sys) (revision 184012) +++ i386/include/param.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -86,9 +86,11 @@ #ifdef PAE #define NPGPTD 4 #define PDRSHIFT 21 /* LOG2(NBPDR) */ +#define NPGPTD_SHIFT 9 #else #define NPGPTD 1 #define PDRSHIFT 22 /* LOG2(NBPDR) */ +#define NPGPTD_SHIFT 10 #endif #define NBPTD (NPGPTD< +#include +#include +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#define PG_KERNEL (PG_V | PG_A | PG_RW | PG_M) + +#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma))) +#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m))) + +static __inline vm_paddr_t +pmap_kextract_ma(vm_offset_t va) +{ + vm_paddr_t ma; + if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) { + ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1)); + } else { + ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK); + } + return ma; +} + +static __inline vm_paddr_t +pmap_kextract(vm_offset_t va) +{ + return xpmap_mtop(pmap_kextract_ma(va)); +} +#define vtomach(va) pmap_kextract_ma(((vm_offset_t) (va))) + +vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va); + +void pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa); +void pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len); +void pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len); + +static __inline pt_entry_t +pte_load_store(pt_entry_t *ptep, pt_entry_t v) +{ + pt_entry_t r; + + v = xpmap_ptom(v); + r = *ptep; + PT_SET_VA(ptep, v, TRUE); + return (r); +} + +static __inline pt_entry_t +pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v) +{ + pt_entry_t r; + + r = *ptep; + PT_SET_VA_MA(ptep, v, TRUE); + return (r); +} + +#define pte_load_clear(ptep) pte_load_store((ptep), (pt_entry_t)0ULL) + +#define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte) +#define pte_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) +#define pde_store_ma(ptep, pte) pte_load_store_ma((ptep), (pt_entry_t)pte) + +#elif !defined(XEN) + /* * Routine: pmap_kextract * Function: @@ -195,11 +273,10 @@ } return pa; } +#endif -#define vtophys(va) pmap_kextract(((vm_offset_t) (va))) +#if defined(PAE) && !defined(XEN) -#ifdef PAE - static __inline pt_entry_t pte_load(pt_entry_t *ptep) { @@ -231,7 +308,7 @@ #define pte_store(ptep, pte) pte_load_store((ptep), (pt_entry_t)pte) -#else /* PAE */ +#elif !defined (PAE) && !defined(XEN) static __inline pt_entry_t pte_load(pt_entry_t *ptep) Index: i386/include/vmparam.h =================================================================== --- i386/include/vmparam.h (.../stable/6/sys) (revision 184012) +++ i386/include/vmparam.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -83,8 +83,12 @@ * Kernel physical load address. */ #ifndef KERNLOAD +#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST) +#define KERNLOAD 0 +#else #define KERNLOAD (1 << PDRSHIFT) #endif +#endif /* * Virtual addresses of things. Derived from the page directory and @@ -93,7 +97,11 @@ * messy at times, but hey, we'll do anything to save a page :-) */ +#ifdef XEN +#define VM_MAX_KERNEL_ADDRESS HYPERVISOR_VIRT_START +#else #define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1) +#endif #define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI) #define KERNBASE VADDR(KPTDI, 0) Index: i386/include/xen/xen-os.h =================================================================== --- i386/include/xen/xen-os.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/xen-os.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,291 @@ +/****************************************************************************** + * os.h + * + * random collection of macros and definition + */ + +#ifndef _XEN_OS_H_ +#define _XEN_OS_H_ + +#ifdef PAE +#define CONFIG_X86_PAE +#endif + +#if defined(XEN) && !defined(__XEN_INTERFACE_VERSION__) +/* + * Can update to a more recent version when we implement + * the hypercall page + */ +#define __XEN_INTERFACE_VERSION__ 0x00030204 +#endif + +#include + +/* Force a proper event-channel callback from Xen. */ +void force_evtchn_callback(void); + +extern int gdtset; + +extern shared_info_t *HYPERVISOR_shared_info; + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__ ( "rep;nop" : : : "memory" ); +} +#define cpu_relax() rep_nop() + +/* crude memory allocator for memory allocation early in + * boot + */ +void *bootmem_alloc(unsigned int size); +void bootmem_free(void *ptr, unsigned int size); + + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +/* some function prototypes */ +void trap_init(void); + +/* + * STI/CLI equivalents. These basically set and clear the virtual + * event_enable flag in teh shared_info structure. Note that when + * the enable bit is set, there may be pending events to be handled. + * We may therefore call into do_hypervisor_callback() directly. + */ +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + + + +#define __cli() \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + _vcpu->evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define __restore_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ + } \ +} while (0) + +/* + * Add critical_{enter, exit}? + * + */ +#define __save_and_cli(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + (x) = _vcpu->evtchn_upcall_mask; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) +#define save_and_cli(x) __save_and_cli(x) + +#define local_irq_save(x) __save_and_cli(x) +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));} +#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); } +#define spin_lock_irqsave mtx_lock_irqsave +#define spin_unlock_irqrestore mtx_unlock_irqrestore + + +#ifndef mb +#define mb() __asm__ __volatile__("lock; addl $0, 0(%%esp)": : :"memory") +#endif +#ifndef rmb +#define rmb() mb() +#endif +#ifndef wmb +#define wmb() barrier() +#endif +#ifdef SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while(0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + + +/* This is a barrier for the compiler only, NOT the processor! */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#define LOCK_PREFIX "" +#define LOCK "" +#define ADDR (*(volatile long *) addr) +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + + + +#define xen_xchg(ptr,v) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((volatile struct __xchg_dummy *)(x)) +static __inline unsigned long __xchg(unsigned long x, volatile void * ptr, + int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static __inline int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline int variable_test_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _OS_H_ */ Property changes on: i386/include/xen/xen-os.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/xen/hypercall.h =================================================================== --- i386/include/xen/hypercall.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/hypercall.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,402 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __MACHINE_XEN_HYPERCALL_H__ +#define __MACHINE_XEN_HYPERCALL_H__ + +#define __STR(x) #x +#define STR(x) __STR(x) +#define ENOXENSYS 38 +#define CONFIG_XEN_COMPAT 0x030002 + + +#if defined(XEN) +#define HYPERCALL_STR(name) \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)" +#else +#define HYPERCALL_STR(name) \ + "mov hypercall_stubs,%%eax; " \ + "add $("STR(__HYPERVISOR_##name)" * 32),%%eax; " \ + "call *%%eax" +#endif + +#define _hypercall0(type, name) \ +({ \ + long __res; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res) \ + : \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + long __res, __ign1; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=b" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + long __res, __ign1, __ign2; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + long __res, __ign1, __ign2, __ign3; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)) \ + : "memory" ); \ + (type)__res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + long __res, __ign1, __ign2, __ign3, __ign4, __ign5; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ + "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)), "4" ((long)(a4)), \ + "5" ((long)(a5)) \ + : "memory" ); \ + (type)__res; \ +}) + +static inline int +HYPERVISOR_set_trap_table( + trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int +HYPERVISOR_mmu_update( + mmu_update_t *req, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int +HYPERVISOR_mmuext_op( + mmuext_op_t *op, int count, int *success_count, domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int +HYPERVISOR_set_gdt( + unsigned long *frame_list, int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_set_callbacks( + unsigned long event_selector, unsigned long event_address, + unsigned long failsafe_selector, unsigned long failsafe_address) +{ + return _hypercall4(int, set_callbacks, + event_selector, event_address, + failsafe_selector, failsafe_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} + +static inline int +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + unsigned long timeout_hi = (unsigned long)(timeout>>32); + unsigned long timeout_lo = (unsigned long)timeout; + return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); +} +#if 0 +static inline int +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} +#endif +static inline int +HYPERVISOR_set_debugreg( + int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long +HYPERVISOR_get_debugreg( + int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int +HYPERVISOR_update_descriptor( + uint64_t ma, uint64_t desc) +{ + return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); +} + +static inline int +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int +HYPERVISOR_multicall( + void *call_list, int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int +HYPERVISOR_update_va_mapping( + unsigned long va, uint64_t new_val, unsigned long flags) +{ + uint32_t hi, lo; + + lo = (uint32_t)(new_val & 0xffffffff); + hi = (uint32_t)(new_val >> 32); + + return _hypercall4(int, update_va_mapping, va, + lo, hi, flags); +} + +static inline int +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (__predict_false(rc == -ENOXENSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + return (rc); +} + +static inline int +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int +HYPERVISOR_console_io( + int cmd, int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (__predict_false(rc == -ENOXENSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + return (rc); +} + +static inline int +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid) +{ + uint32_t hi, lo; + + lo = (uint32_t)(new_val & 0xffffffff); + hi = (uint32_t)(new_val >> 32); + + return _hypercall5(int, update_va_mapping_otherdomain, va, + lo, hi, flags, domid); +} + +static inline int +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int +HYPERVISOR_vcpu_op( + int cmd, int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOXENSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +#endif + return (rc); +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int +HYPERVISOR_nmi_op( + unsigned long op, void *arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} +#endif + +static inline int +HYPERVISOR_callback_op( + int cmd, void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +#ifndef CONFIG_XEN +static inline unsigned long +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} +#endif + +static inline int +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +#endif /* __MACHINE_XEN_HYPERCALL_H__ */ + +/* + * Local variables: + * c-file-style: "linux" + * indent-tabs-mode: t + * c-indent-level: 8 + * c-basic-offset: 8 + * tab-width: 8 + * End: + */ Property changes on: i386/include/xen/hypercall.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/xen/xenvar.h =================================================================== --- i386/include/xen/xenvar.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/xenvar.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2008 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * $FreeBSD$ + */ +#ifndef XENVAR_H_ +#define XENVAR_H_ +#define XBOOTUP 0x1 +#define XPMAP 0x2 +extern int xendebug_flags; +#ifndef NOXENDEBUG +#define XENPRINTF printk +#else +#define XENPRINTF printf +#endif + +extern xen_pfn_t *xen_phys_machine; +extern xen_pfn_t *xen_pfn_to_mfn_frame_list[16]; +extern xen_pfn_t *xen_pfn_to_mfn_frame_list_list; + +#if 0 +#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__) +#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__) +#define TRACE_DEBUG(argflags, _f, _a...) \ +if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a); +#else +#define TRACE_ENTER +#define TRACE_EXIT +#define TRACE_DEBUG(argflags, _f, _a...) +#endif + +extern xen_pfn_t *xen_machine_phys; +/* Xen starts physical pages after the 4MB ISA hole - + * FreeBSD doesn't + */ + + +#undef ADD_ISA_HOLE /* XXX */ + +#ifdef ADD_ISA_HOLE +#define ISA_INDEX_OFFSET 1024 +#define ISA_PDR_OFFSET 1 +#else +#define ISA_INDEX_OFFSET 0 +#define ISA_PDR_OFFSET 0 +#endif + + +#define PFNTOMFN(i) (xen_phys_machine[(i)]) +#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)]) + +#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE) +#define PTOV(x) (((uintptr_t)(x)) + KERNBASE) + +#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT) +#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT) + +#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT) +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) + +#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT) +#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT) + + +void xpq_init(void); + +int xen_create_contiguous_region(vm_page_t pages, int npages); + +void xen_destroy_contiguous_region(void * addr, int npages); + +#endif Property changes on: i386/include/xen/xenvar.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/xen/synch_bitops.h =================================================================== --- i386/include/xen/synch_bitops.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/synch_bitops.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,139 @@ +#ifndef __XEN_SYNCH_BITOPS_H__ +#define __XEN_SYNCH_BITOPS_H__ + +/* + * Copyright 1992, Linus Torvalds. + * Heavily modified to provide guaranteed strong synchronisation + * when communicating with Xen or other guest OSes running on other CPUs. + */ + + +#define ADDR (*(volatile long *) addr) + +static __inline__ void synch_set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btsl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btrl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btcl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__ ( + "lock btcl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +struct __synch_xchg_dummy { unsigned long a[100]; }; +#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x)) + +#define synch_cmpxchg(ptr, old, new) \ +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ + (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +static inline unsigned long __synch_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#ifdef CONFIG_X86_64 + case 4: + __asm__ __volatile__("lock; cmpxchgl %k1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#else + case 4: + __asm__ __volatile__("lock; cmpxchgl %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; +#endif + } + return old; +} + +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int synch_var_test_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "btl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); + return oldbit; +} + +#define synch_test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + synch_const_test_bit((nr),(addr)) : \ + synch_var_test_bit((nr),(addr))) + +#endif /* __XEN_SYNCH_BITOPS_H__ */ Property changes on: i386/include/xen/synch_bitops.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/xen/xenfunc.h =================================================================== --- i386/include/xen/xenfunc.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/xenfunc.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,79 @@ +/* + * + * Copyright (c) 2004,2005 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _XEN_XENFUNC_H_ +#define _XEN_XENFUNC_H_ + +#include +#include +#include +#include +#include +#define BKPT __asm__("int3"); +#define XPQ_CALL_DEPTH 5 +#define XPQ_CALL_COUNT 2 +#define PG_PRIV PG_AVAIL3 +typedef struct { + unsigned long pt_ref; + unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH]; +} pteinfo_t; + +extern pteinfo_t *pteinfo_list; +#ifdef XENDEBUG_LOW +#define __PRINTK(x) printk x +#else +#define __PRINTK(x) +#endif + +char *xen_setbootenv(char *cmd_line); + +int xen_boothowto(char *envp); + +void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line); + +#ifdef INVARIANTS +#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__) +#else +#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0) +#endif + +void xen_update_descriptor(union descriptor *, union descriptor *); + +extern struct mtx balloon_lock; +#if 0 +#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags) +#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags) +#else +#define balloon_lock(__flags) __flags = 1 +#define balloon_unlock(__flags) __flags = 0 +#endif + + + +#endif /* _XEN_XENFUNC_H_ */ Property changes on: i386/include/xen/xenfunc.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/include/xen/xenpmap.h =================================================================== --- i386/include/xen/xenpmap.h (.../stable/6/sys) (revision 0) +++ i386/include/xen/xenpmap.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,230 @@ +/* + * + * Copyright (c) 2004 Christian Limpach. + * Copyright (c) 2004,2005 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christian Limpach. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _XEN_XENPMAP_H_ +#define _XEN_XENPMAP_H_ + +#include + +void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int); +void xen_pt_switch(vm_paddr_t); +void xen_set_ldt(vm_paddr_t, unsigned long); +void xen_pgdpt_pin(vm_paddr_t); +void xen_pgd_pin(vm_paddr_t); +void xen_pgd_unpin(vm_paddr_t); +void xen_pt_pin(vm_paddr_t); +void xen_pt_unpin(vm_paddr_t); +void xen_flush_queue(void); +void xen_check_queue(void); +#if 0 +void pmap_ref(pt_entry_t *pte, vm_paddr_t ma); +#endif +void pmap_suspend(void); +void pmap_resume(void); + +#ifdef INVARIANTS +#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__) +#else +#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0) +#endif + +#ifdef PMAP_DEBUG +#define PMAP_REF pmap_ref +#define PMAP_DEC_REF_PAGE pmap_dec_ref_page +#define PMAP_MARK_PRIV pmap_mark_privileged +#define PMAP_MARK_UNPRIV pmap_mark_unprivileged +#else +#define PMAP_MARK_PRIV(a) +#define PMAP_MARK_UNPRIV(a) +#define PMAP_REF(a, b) +#define PMAP_DEC_REF_PAGE(a) +#endif + +#define ALWAYS_SYNC 0 + +#ifdef PT_DEBUG +#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) +#else +#define PT_LOG() +#endif + +#define INVALID_P2M_ENTRY (~0UL) + +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ + +#define SH_PD_SET_VA 1 +#define SH_PD_SET_VA_MA 2 +#define SH_PD_SET_VA_CLEAR 3 + +struct pmap; +void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type); +#ifdef notyet +static vm_paddr_t +vptetomachpte(vm_paddr_t *pte) +{ + vm_offset_t offset, ppte; + vm_paddr_t pgoffset, retval, *pdir_shadow_ptr; + int pgindex; + + ppte = (vm_offset_t)pte; + pgoffset = (ppte & PAGE_MASK); + offset = ppte - (vm_offset_t)PTmap; + pgindex = ppte >> PDRSHIFT; + + pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow); + retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset; + return (retval); +} +#endif +#define PT_GET(_ptp) \ + (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0)) + +#ifdef WRITABLE_PAGETABLES + +#define PT_SET_VA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + PT_LOG(); \ + *(_ptp) = xpmap_ptom((_npte)); \ +} while (/*CONSTCOND*/0) +#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + PT_LOG(); \ + *(_ptp) = (_npte); \ +} while (/*CONSTCOND*/0) +#define PT_CLEAR_VA(_ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + PT_LOG(); \ + *(_ptp) = 0; \ +} while (/*CONSTCOND*/0) + +#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#else /* !WRITABLE_PAGETABLES */ + +#define PT_SET_VA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + xen_queue_pt_update(vtomach(_ptp), \ + xpmap_ptom(_npte)); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + xen_queue_pt_update(vtomach(_ptp), _npte); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PT_CLEAR_VA(_ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + xen_queue_pt_update(vtomach(_ptp), 0); \ + if (sync || ALWAYS_SYNC) \ + xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#endif + +#define PT_SET_MA(_va, _ma) \ +do { \ + int err; \ + err = HYPERVISOR_update_va_mapping(((unsigned long)(_va)), \ + (_ma), UVMF_INVLPG| UVMF_ALL); \ + KASSERT(err >= 0, ("unexpected result from update_va_mapping")); \ +} while (/*CONSTCOND*/0) + +#define PT_UPDATES_FLUSH() do { \ + xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +static __inline vm_paddr_t +xpmap_mtop(vm_paddr_t mpa) +{ + vm_paddr_t tmp = (mpa & PG_FRAME); + + return machtophys(tmp) | (mpa & ~PG_FRAME); +} + +static __inline vm_paddr_t +xpmap_ptom(vm_paddr_t ppa) +{ + vm_paddr_t tmp = (ppa & PG_FRAME); + + return phystomach(tmp) | (ppa & ~PG_FRAME); +} + +static __inline void +set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ +#ifdef notyet + PANIC_IF(max_mapnr && pfn >= max_mapnr); +#endif + if (xen_feature(XENFEAT_auto_translated_physmap)) { +#ifdef notyet + PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY)); +#endif + return; + } + xen_phys_machine[pfn] = mfn; +} + + + + +#endif /* _XEN_XENPMAP_H_ */ Property changes on: i386/include/xen/xenpmap.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/conf/DEFAULTS =================================================================== --- i386/conf/DEFAULTS (.../stable/6/sys) (revision 184012) +++ i386/conf/DEFAULTS (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -15,3 +15,5 @@ # Pseudo devices. device mem # Memory and kernel memory devices device io # I/O device + +options NATIVE Index: i386/conf/XEN =================================================================== --- i386/conf/XEN (.../stable/6/sys) (revision 0) +++ i386/conf/XEN (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,156 @@ +# +# GENERIC -- Generic kernel configuration file for FreeBSD/i386 +# +# For more information on this file, please read the handbook section on +# Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +machine i386 +cpu I686_CPU +ident XEN + +# To statically compile in device wiring instead of /boot/device.hints +#hints "GENERIC.hints" # Default places to look for devices. + +makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols +makeoptions MODULES_OVERRIDE="" + +#options SCHED_ULE # ULE scheduler +#options PREEMPTION # Enable kernel thread preemption +options SCHED_4BSD +options INET # InterNETworking +options INET6 # IPv6 communications protocols +options FFS # Berkeley Fast Filesystem +options SOFTUPDATES # Enable FFS soft updates support +options UFS_ACL # Support for access control lists +options UFS_DIRHASH # Improve performance on big directories +options MD_ROOT # MD is a potential root device +options NFSCLIENT # Network Filesystem Client +options NFSSERVER # Network Filesystem Server +options NFS_ROOT # NFS usable as /, requires NFSCLIENT +options MSDOSFS # MSDOS Filesystem +options CD9660 # ISO 9660 Filesystem +options PROCFS # Process filesystem (requires PSEUDOFS) +options PSEUDOFS # Pseudo-filesystem framework +options GEOM_LABEL # Provides labelization +options COMPAT_FREEBSD4 # Compatible with FreeBSD4 +options COMPAT_FREEBSD5 # Compatible with FreeBSD5 +options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI +options KTRACE # ktrace(1) support +options SYSVSHM # SYSV-style shared memory +options SYSVMSG # SYSV-style message queues +options SYSVSEM # SYSV-style semaphores +options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions +options KBD_INSTALL_CDEV # install a CDEV entry in /dev +options AUDIT # Security event auditing + +# Debugging for use in -current +options KDB # Enable kernel debugger support. +options DDB # Support DDB. +options GDB # Support remote GDB. +#options INVARIANTS # Enable calls of extra sanity checking +#options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS +#options WITNESS # Enable checks to detect deadlocks and cycles +#options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed + +# To make an SMP kernel, the next two lines are needed +#options SMP # Symmetric MultiProcessor Kernel +#device apic # I/O APIC +options PAE + + +# CPU frequency control +#device cpufreq # native only + +# Bus support. +#device pci + +# SCSI peripherals +device scbus # SCSI bus (required for SCSI) +device ch # SCSI media changers +device da # Direct Access (disks) +device sa # Sequential Access (tape etc) +device cd # CD +device pass # Passthrough device (direct SCSI access) +device ses # SCSI Environmental Services (and SAF-TE) + +# atkbdc0 controls both the keyboard and the PS/2 mouse +device atkbdc # AT keyboard controller +device atkbd # AT keyboard +device psm # PS/2 mouse +device kbdmux # keyboard multiplexer +#device vga # VGA video card driver +device splash # Splash screen and screen saver support + +# syscons is the default console driver, resembling an SCO console + +#device agp # support several AGP chipsets + +# Power management support (see NOTES for more options) +#device apm +# Add suspend/resume support for the i8254. +#device pmtimer # native + +device pci + +# Serial (COM) ports +device uart # Generic UART driver + +# If you've got a "dumb" serial or parallel PCI card that is +# supported by the puc(4) glue driver, uncomment the following +# line to enable it (connects to sio, uart and/or ppc drivers): +#device puc + +# PCI Ethernet NICs. +device em # Intel PRO/1000 adapter Gigabit Ethernet Card + +# PCI Ethernet NICs that use the common MII bus controller code. +# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! +device miibus # MII bus support + +# Pseudo devices. +device loop # Network loopback +device random # Entropy device +device ether # Ethernet support +device sl # Kernel SLIP +device ppp # Kernel PPP +device tun # Packet tunnel. +device pty # Pseudo-ttys (telnet etc) +device md # Memory "disks" +device gif # IPv6 and IPv4 tunneling +device faith # IPv6-to-IPv4 relaying (translation) +device firmware # firmware assist module + +# The `bpf' device enables the Berkeley Packet Filter. +# Be aware of the administrative consequences of enabling this! +# Note that 'bpf' is required for DHCP. +device bpf # Berkeley packet filter + + +options XEN +nooption NATIVE +nodevice atpic +options MCLSHIFT=12 + +nodevice isa +nooption ISAPNP + +options KTR +options KTR_COMPILE=(KTR_PMAP) +options KTR_CPUMASK=0xff +options KTR_ENTRIES=65536 +options KTR_MASK=(KTR_PMAP) +options KVA_PAGES=1600 Index: i386/i386/vm_machdep.c =================================================================== --- i386/i386/vm_machdep.c (.../stable/6/sys) (revision 184012) +++ i386/i386/vm_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -89,6 +89,9 @@ #include #include +#ifdef XEN +#include +#endif #ifdef PC98 #include #else @@ -264,7 +267,7 @@ /* Setup to release sched_lock in fork_exit(). */ td2->td_md.md_spinlock_count = 1; - td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td2->td_md.md_saved_flags = PSL_USER; /* * Now, cpu_switch() can schedule the new process. @@ -436,7 +439,7 @@ /* Setup to release sched_lock in fork_exit(). */ td->td_md.md_spinlock_count = 1; - td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td->td_md.md_saved_flags = PSL_USER; } /* @@ -593,6 +596,9 @@ int b; #endif +#ifdef XEN + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +#endif disable_intr(); #ifdef CPU_ELAN if (elan_mmcr != NULL) @@ -762,8 +768,11 @@ */ ptep = vtopte(sf->kva); opte = *ptep; +#ifdef XEN + PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag | PG_RW | PG_V); +#else *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V; - +#endif /* * Avoid unnecessary TLB invalidations: If the sf_buf's old * virtual-to-physical mapping was not used, then any processor @@ -812,6 +821,14 @@ if (sf->ref_count == 0) { TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); nsfbufsused--; +#ifdef XEN + /* + * Xen doesn't like having dangling R/W mappings + */ + pmap_qremove(sf->kva, 1); + sf->m = NULL; + LIST_REMOVE(sf, list_entry); +#endif if (sf_buf_alloc_want > 0) wakeup_one(&sf_buf_freelist); } Index: i386/i386/swtch.s =================================================================== --- i386/i386/swtch.s (.../stable/6/sys) (revision 184012) +++ i386/i386/swtch.s (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -71,7 +71,7 @@ movl 8(%esp),%ecx /* New thread */ movl TD_PCB(%ecx),%edx movl PCB_CR3(%edx),%eax - movl %eax,%cr3 /* new address space */ + LOAD_CR3(%eax) /* new address space */ /* set bit in new pm_active */ movl TD_PROC(%ecx),%eax movl P_VMSPACE(%eax), %ebx @@ -114,11 +114,13 @@ movl %gs,PCB_GS(%edx) pushfl /* PSL */ popl PCB_PSL(%edx) +#ifndef XEN /* Check to see if we need to call a switchout function. */ movl PCB_SWITCHOUT(%edx),%eax cmpl $0, %eax je 1f call *%eax +#endif 1: /* Test if debug registers should be saved. */ testl $PCB_DBREGS,PCB_FLAGS(%edx) @@ -171,7 +173,7 @@ movl %cr3,%ebx /* The same address space? */ cmpl %ebx,%eax je sw1 - movl %eax,%cr3 /* new address space */ + LOAD_CR3(%eax) /* new address space */ /* Release bit from old pmap->pm_active */ movl PCPU(CURPMAP), %ebx @@ -191,7 +193,19 @@ btsl %esi, PM_ACTIVE(%ebx) /* set new */ sw1: +#ifdef XEN + pushl %eax + pushl %ecx + pushl %edx + call xen_handle_thread_switch + popl %edx + popl %ecx + popl %eax /* + * XXX set IOPL + */ +#else + /* * At this point, we've switched address spaces and are ready * to load up the rest of the next context. */ @@ -238,7 +252,7 @@ movl 12(%esi), %ebx movl %eax, 8(%edi) movl %ebx, 12(%edi) - +#endif /* Restore context. */ movl PCB_EBX(%edx),%ebx movl PCB_ESP(%edx),%esp @@ -263,7 +277,7 @@ movl _default_ldt,%eax cmpl PCPU(CURRENTLDT),%eax je 2f - lldt _default_ldt + LLDT(_default_ldt) movl %eax,PCPU(CURRENTLDT) jmp 2f 1: @@ -366,7 +380,7 @@ * parent's npx state for forks by forgetting to reload. */ pushfl - cli + CLI movl PCPU(FPCURTHREAD),%eax testl %eax,%eax je 1f Index: i386/i386/apic_vector.s =================================================================== --- i386/i386/apic_vector.s (.../stable/6/sys) (revision 184012) +++ i386/i386/apic_vector.s (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -299,6 +299,7 @@ /* * Handler for IPIs sent via the per-cpu IPI bitmap. */ +#ifndef XEN .text SUPERALIGN_TEXT IDTVEC(ipi_intr_bitmap_handler) @@ -320,7 +321,7 @@ addl $4, %esp /* XXX convert clockframe to trapframe */ MEXITCOUNT jmp doreti - +#endif /* * Executed by a CPU when it receives an Xcpustop IPI from another CPU, * Index: i386/i386/genassym.c =================================================================== --- i386/i386/genassym.c (.../stable/6/sys) (revision 184012) +++ i386/i386/genassym.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -227,3 +227,9 @@ ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base)); ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat)); #endif + +#ifdef XEN +#include +ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3)); +ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START); +#endif Index: i386/i386/support.s =================================================================== --- i386/i386/support.s (.../stable/6/sys) (revision 184012) +++ i386/i386/support.s (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -1426,10 +1426,11 @@ */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) +#ifndef XEN /* reload the descriptor table */ movl 4(%esp),%eax lgdt (%eax) - +#endif /* flush the prefetch q */ jmp 1f nop Index: i386/i386/busdma_machdep.c =================================================================== --- i386/i386/busdma_machdep.c (.../stable/6/sys) (revision 184012) +++ i386/i386/busdma_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -140,6 +140,11 @@ static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); +#ifdef XEN +#undef pmap_kextract +#define pmap_kextract pmap_kextract_ma +#endif + /* * Return true if a match is made. * Index: i386/i386/sys_machdep.c =================================================================== --- i386/i386/sys_machdep.c (.../stable/6/sys) (revision 184012) +++ i386/i386/sys_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -58,6 +58,27 @@ #include /* for kernel_map */ +#ifdef XEN +#include +#include +#include + +void i386_reset_ldt(struct proc_ldt *pldt); + +void +i386_reset_ldt(struct proc_ldt *pldt) +{ + xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); +} +#define SEG_VIRT_END (HYPERVISOR_VIRT_START >> 12) & 0xffff +#define SET_DESCRIPTOR(index, sd) \ + HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[index]), *(uint64_t *)&(sd)); +#else +#define i386_reset_ldt(x) +#define SEG_VIRT_END 0xffff +#define SET_DESCRIPTOR(index, sd) PCPU_GET(fsgs_gdt)[index] = (sd); +#endif + #define MAX_LD 8192 #define LD_PER_PAGE 512 #define NEW_MAX_LD(num) ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1)) @@ -163,7 +184,7 @@ */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; - sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ + sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -173,7 +194,7 @@ sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_fsd = sd; - PCPU_GET(fsgs_gdt)[0] = sd; + SET_DESCRIPTOR(0, sd); critical_exit(); td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL); } @@ -193,7 +214,7 @@ */ sd.sd_lobase = base & 0xffffff; sd.sd_hibase = (base >> 24) & 0xff; - sd.sd_lolimit = 0xffff; /* 4GB limit, wraps around */ + sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */ sd.sd_hilimit = 0xf; sd.sd_type = SDT_MEMRWA; sd.sd_dpl = SEL_UPL; @@ -203,7 +224,7 @@ sd.sd_gran = 1; critical_enter(); td->td_pcb->pcb_gsd = sd; - PCPU_GET(fsgs_gdt)[1] = sd; + SET_DESCRIPTOR(1, sd); critical_exit(); load_gs(GSEL(GUGS_SEL, SEL_UPL)); } @@ -364,6 +385,10 @@ struct proc_ldt *pldt; pldt = mdp->md_ldt; +#ifdef XEN + i386_reset_ldt(pldt); + PCPU_SET(currentldt, (int)pldt); +#else #ifdef SMP gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd; #else @@ -371,6 +396,7 @@ #endif lldt(GSEL(GUSERLDT_SEL, SEL_KPL)); PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL)); +#endif /* !XEN */ } #ifdef SMP @@ -385,6 +411,39 @@ } #endif +#ifdef XEN + +struct proc_ldt * +user_ldt_alloc(struct mdproc *mdp, int len) +{ + struct proc_ldt *pldt, *new_ldt; + + MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), + M_SUBPROC, M_WAITOK); + + new_ldt->ldt_len = len = NEW_MAX_LD(len); + new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, + round_page(len * sizeof(union descriptor))); + if (new_ldt->ldt_base == NULL) { + FREE(new_ldt, M_SUBPROC); + return NULL; + } + new_ldt->ldt_refcnt = 1; + new_ldt->ldt_active = 0; + + if ((pldt = mdp->md_ldt)) { + if (len > pldt->ldt_len) + len = pldt->ldt_len; + bcopy(pldt->ldt_base, new_ldt->ldt_base, + len * sizeof(union descriptor)); + } else { + bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); + } + pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, + new_ldt->ldt_len*sizeof(union descriptor)); + return new_ldt; +} +#else /* * Must be called with either sched_lock free or held but not recursed. * If it does not return NULL, it will return with it owned. @@ -425,6 +484,7 @@ } return new_ldt; } +#endif /* * Must be called either with sched_lock free or held but not recursed. @@ -443,8 +503,11 @@ mtx_lock_spin(&sched_lock); mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); if (td == PCPU_GET(curthread)) { +#ifndef XEN lldt(_default_ldt); +#endif PCPU_SET(currentldt, _default_ldt); + i386_reset_ldt((struct proc_ldt *)_default_ldt); } mdp->md_ldt = NULL; @@ -549,6 +612,9 @@ } if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) { +#ifdef XEN + load_gs(0); /* XXX check if we really still need this */ +#endif /* complain a for a while if using old methods */ if (ldt_warnings++ < NUM_LDT_WARNINGS) { printf("Warning: pid %d used static ldt allocation.\n", @@ -671,6 +737,23 @@ return (error); } +#ifdef XEN +static int +i386_set_ldt_data(struct thread *td, int start, int num, + union descriptor *descs) +{ + struct mdproc *mdp = &td->td_proc->p_md; + struct proc_ldt *pldt = mdp->md_ldt; + int i, error; + + for (i = 0; i < num; i++) { + error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), *(uint64_t *)(descs + i)); + if (error) + panic("failed to update ldt: %d", error); + } + return (0); +} +#else static int i386_set_ldt_data(struct thread *td, int start, int num, union descriptor *descs) @@ -686,6 +769,7 @@ num * sizeof(union descriptor)); return (0); } +#endif static int i386_ldt_grow(struct thread *td, int len) Index: i386/i386/machdep.c =================================================================== --- i386/i386/machdep.c (.../stable/6/sys) (revision 184012) +++ i386/i386/machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -141,6 +141,24 @@ uint32_t arch_i386_xbox_memsize = 0; #endif +#ifdef XEN +/* XEN includes */ +#include +#include +#include +#include +#include + +void Xhypervisor_callback(void); +void failsafe_callback(void); + +extern trap_info_t trap_table[]; +struct proc_ldt default_proc_ldt; +extern int init_first; +int running_xen = 1; +extern unsigned long physfree; +#endif /* XEN */ + /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); @@ -282,8 +300,9 @@ */ bufinit(); vm_pager_bufferinit(); - +#ifndef XEN cpu_setregs(); +#endif } /* @@ -1108,6 +1127,25 @@ return (0); } +static int cpu_idle_hlt = 1; +SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, + &cpu_idle_hlt, 0, "Idle loop HLT enable"); +#ifdef XEN + +void +cpu_halt(void) +{ + HYPERVISOR_shutdown(SHUTDOWN_poweroff); +} + +static void +cpu_idle_default(void) +{ + idle_block(); +} + +#else + /* * Shutdown the CPU as much as possible */ @@ -1133,9 +1171,6 @@ * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ -static int cpu_idle_hlt = 1; -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) @@ -1147,6 +1182,7 @@ */ __asm __volatile("sti; hlt"); } +#endif /* !XEN */ /* * Note that we have to be careful here to avoid a race between checking @@ -1158,7 +1194,7 @@ cpu_idle(void) { -#ifdef SMP +#if defined(SMP) && !defined(XEN) if (mp_grab_cpu_hlt()) return; #endif @@ -1317,10 +1353,16 @@ */ int _default_ldt; + +#ifdef XEN +union descriptor *gdt; +union descriptor *ldt; +#else union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ +union descriptor ldt[NLDT]; /* local descriptor table */ +#endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ -union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ int private_tss; /* flag indicating private tss */ @@ -1355,7 +1397,7 @@ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1382,7 +1424,7 @@ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1391,7 +1433,7 @@ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ @@ -1418,11 +1460,12 @@ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ - 0, /* segment descriptor priority level */ + SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, +#ifndef XEN /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ @@ -1514,6 +1557,7 @@ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, +#endif /* !XEN */ }; static struct soft_segment_descriptor ldt_segs[] = { @@ -1735,7 +1779,17 @@ goto physmap_done; } #endif - +#ifdef XEN + has_smap = 0; + Maxmem = xen_start_info->nr_pages - init_first; + physmem = Maxmem; + basemem = 0; + physmap[0] = init_first << PAGE_SHIFT; + physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE); + physmap_idx = 0; + goto physmap_done; +#endif + hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); @@ -1898,7 +1952,7 @@ vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; -#else +#elif !defined(XEN) /* * Prefer the RTC value for extended memory. */ @@ -1988,7 +2042,7 @@ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; - +#ifndef XEN /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. @@ -2106,7 +2160,10 @@ } *pte = 0; invltlb(); - +#else + phys_avail[0] = physfree; + phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; +#endif /* * XXX * The last chunk must contain at least one page plus the message @@ -2128,7 +2185,261 @@ avail_end = phys_avail[pa_indx]; } +#ifdef XEN + +#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) void +init386(int first) +{ + int error, gsel_tss, metadata_missing, x; + unsigned long off, gdtmachpfn; + struct pcpu *pc; + struct callback_register event = { + .type = CALLBACKTYPE_event, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, + }; + struct callback_register failsafe = { + .type = CALLBACKTYPE_failsafe, + .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, + }; + + thread0.td_kstack = proc0kstack; + thread0.td_pcb = (struct pcb *) + (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; + + /* + * This may be done better later if it gets more high level + * components in it. If so just link td->td_proc here. + */ + proc_linkup(&proc0, &ksegrp0, &thread0); + + metadata_missing = 0; + if (xen_start_info->mod_start) { + preload_metadata = (caddr_t)xen_start_info->mod_start; + preload_bootstrap_relocate(KERNBASE); + } else { + metadata_missing = 1; + } + if (envmode == 1) + kern_envp = static_env; + else if ((caddr_t)xen_start_info->cmd_line) + kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); + + boothowto |= xen_boothowto(kern_envp); + + /* Init basic tunables, hz etc */ + init_param1(); + + /* + * XEN occupies a portion of the upper virtual address space + * At its base it manages an array mapping machine page frames + * to physical page frames - hence we need to be able to + * access 4GB - (64MB - 4MB + 64k) + */ + gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); + + pc = &__pcpu[0]; + gdt_segs[GPRIV_SEL].ssd_base = (int) pc; + gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; + + PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); + bzero(gdt, PAGE_SIZE); + for (x = 0; x < NGDT; x++) + ssdtosd(&gdt_segs[x], &gdt[x].sd); + + + if (bootverbose) { + printf("gdt=%p\n", gdt); + printf("PTmap=%p\n", PTmap); + printf("addr=%#jx\n", (uintmax_t)*vtopte((unsigned long)gdt) & ~PG_RW); + } + + gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; + PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~(PG_RW|PG_M|PG_A)); + error = HYPERVISOR_set_gdt(&gdtmachpfn, 512); + KASSERT(error == 0, ("unexpected result from set_gdt")); + lgdt(&r_gdt /* unused */); + gdtset = 1; + + if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { + panic("set_trap_table failed - error %d\n", error); + } + + error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); + if (error == 0) + error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (error == -ENOXENSYS) + HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), + (unsigned long)Xhypervisor_callback, + GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); +#endif + pcpu_init(pc, 0, sizeof(struct pcpu)); + PCPU_SET(prvspace, pc); + PCPU_SET(curthread, &thread0); + PCPU_SET(curpcb, thread0.td_pcb); + PCPU_SET(pdir, (unsigned long)IdlePTD); + + /* + * Initialize mutexes. + * + * icu_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. + */ + mutex_init(); + mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); + + /* make ldt memory segments */ + PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); + bzero(ldt, PAGE_SIZE); + ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); + ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); + for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) + ssdtosd(&ldt_segs[x], &ldt[x].sd); + + default_proc_ldt.ldt_base = (caddr_t)ldt; + default_proc_ldt.ldt_len = 6; + _default_ldt = (int)&default_proc_ldt; + PCPU_SET(currentldt, _default_ldt) + PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); + xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); + +#ifdef XBOX + /* + * The following code queries the PCI ID of 0:0:0. For the XBOX, + * This should be 0x10de / 0x02a5. + * + * This is exactly what Linux does. + */ + outl(0xcf8, 0x80000000); + if (inl(0xcfc) == 0x02a510de) { + arch_i386_is_xbox = 1; + pic16l_setled(XBOX_LED_GREEN); + + /* + * We are an XBOX, but we may have either 64MB or 128MB of + * memory. The PCI host bridge should be programmed for this, + * so we just query it. + */ + outl(0xcf8, 0x80000084); + arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; + } +#endif /* XBOX */ +#if defined (XEN_PRIVILEGED) + /* + * Initialize the i8254 before the console so that console + * initialization can use DELAY(). + */ + i8254_init(); +#endif + /* + * Initialize the console before we print anything out. + */ + cninit(); + + if (metadata_missing) + printf("WARNING: loader(8) metadata is missing!\n"); + +#ifdef DEV_ISA + if (xen_start_info->flags & SIF_PRIVILEGED) { + elcr_probe(); +#ifdef DEV_ATPIC + atpic_startup(); +#endif + } +#endif + +#ifdef DDB + ksym_start = bootinfo.bi_symtab; + ksym_end = bootinfo.bi_esymtab; +#endif + + kdb_init(); + +#ifdef KDB + if (boothowto & RB_KDB) + kdb_enter("Boot flags requested debugger"); +#endif + + finishidentcpu(); /* Final stage of CPU initialization */ + setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + initializecpu(); /* Initialize CPU registers */ + + /* make an initial tss so cpu can get interrupt stack on syscall! */ + /* Note: -16 is so we can grow the trapframe if we came from vm86 */ + PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); + PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); + HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), + PCPU_GET(common_tss.tss_esp0)); + + + /* pointer to selector slot for %fs/%gs */ + PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); + + dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = + dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; + dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = + dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); +#ifdef PAE + dblfault_tss.tss_cr3 = (int)IdlePDPT; +#else + dblfault_tss.tss_cr3 = (int)IdlePTD; +#endif + dblfault_tss.tss_eip = (int)dblfault_handler; + dblfault_tss.tss_eflags = PSL_KERNEL; + dblfault_tss.tss_ds = dblfault_tss.tss_es = + dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); + dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); + dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); + dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); + + vm86_initialize(); + getmemsize(first); + init_param2(physmem); + + + /* Map the message buffer. */ + for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) + pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off); + + /* now running on new page tables, configured,and u/iom is accessible */ + + msgbufinit(msgbufp, MSGBUF_SIZE); + + /* transfer to user mode */ + + _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); + _udatasel = GSEL(GUDATA_SEL, SEL_UPL); + + /* setup proc 0's pcb */ + thread0.td_pcb->pcb_flags = 0; +#ifdef PAE + thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; +#else + thread0.td_pcb->pcb_cr3 = (int)IdlePTD; +#endif + thread0.td_pcb->pcb_ext = 0; + thread0.td_frame = &proc0_tf; + thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; + thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; +} + +#else +void init386(first) int first; { @@ -2389,6 +2700,7 @@ thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } +#endif /* !XEN */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) Index: i386/i386/trap.c =================================================================== --- i386/i386/trap.c (.../stable/6/sys) (revision 184012) +++ i386/i386/trap.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -215,6 +215,7 @@ goto out; #endif +#ifndef XEN if ((frame.tf_eflags & PSL_I) == 0) { /* * Buggy application or kernel code has disabled @@ -245,6 +246,7 @@ enable_intr(); } } +#endif eva = 0; code = frame.tf_err; Index: i386/i386/intr_machdep.c =================================================================== --- i386/i386/intr_machdep.c (.../stable/6/sys) (revision 184012) +++ i386/i386/intr_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -284,7 +284,12 @@ /* Schedule the ithread if needed. */ if (thread) { error = intr_event_schedule_thread(ie); +#ifndef XEN KASSERT(error == 0, ("bad stray interrupt")); +#else + if (error != 0) + log(LOG_CRIT, "bad stray interrupt %d", vector); +#endif } critical_exit(); td->td_intr_nesting_level--; Index: i386/xen/exception.s =================================================================== --- i386/xen/exception.s (.../stable/6/sys) (revision 0) +++ i386/xen/exception.s (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,489 @@ +/*- + * Copyright (c) 1989, 1990 William F. Jolitz. + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "opt_apic.h" +#include "opt_npx.h" + +#include +#include +#include + +#include "assym.s" + +#define SEL_RPL_MASK 0x0002 +#define __HYPERVISOR_iret 23 + +/* Offsets into shared_info_t. */ + +#define evtchn_upcall_pending /* 0 */ +#define evtchn_upcall_mask 1 + +#define sizeof_vcpu_shift 6 + + +#ifdef SMP +#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \ + shl $sizeof_vcpu_shift,reg ; \ + addl HYPERVISOR_shared_info,reg +#else +#define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg +#endif + +#define __DISABLE_INTERRUPTS(reg) movb $1,evtchn_upcall_mask(reg) +#define __ENABLE_INTERRUPTS(reg) movb $0,evtchn_upcall_mask(reg) +#define DISABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \ + __DISABLE_INTERRUPTS(reg) +#define ENABLE_INTERRUPTS(reg) GET_VCPU_INFO(reg) ; \ + __ENABLE_INTERRUPTS(reg) +#define __TEST_PENDING(reg) testb $0xFF,evtchn_upcall_pending(reg) + +#define POPA \ + popl %edi; \ + popl %esi; \ + popl %ebp; \ + popl %ebx; \ + popl %ebx; \ + popl %edx; \ + popl %ecx; \ + popl %eax; + + .text + +/*****************************************************************************/ +/* Trap handling */ +/*****************************************************************************/ +/* + * Trap and fault vector routines. + * + * Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on + * the stack that mostly looks like an interrupt, but does not disable + * interrupts. A few of the traps we are use are interrupt gates, + * SDT_SYS386IGT, which are nearly the same thing except interrupts are + * disabled on entry. + * + * The cpu will push a certain amount of state onto the kernel stack for + * the current process. The amount of state depends on the type of trap + * and whether the trap crossed rings or not. See i386/include/frame.h. + * At the very least the current EFLAGS (status register, which includes + * the interrupt disable state prior to the trap), the code segment register, + * and the return instruction pointer are pushed by the cpu. The cpu + * will also push an 'error' code for certain traps. We push a dummy + * error code for those traps where the cpu doesn't in order to maintain + * a consistent frame. We also push a contrived 'trap number'. + * + * The cpu does not push the general registers, we must do that, and we + * must restore them prior to calling 'iret'. The cpu adjusts the %cs and + * %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we + * must load them with appropriate values for supervisor mode operation. + */ + +MCOUNT_LABEL(user) +MCOUNT_LABEL(btrap) + +IDTVEC(div) + pushl $0; TRAP(T_DIVIDE) +IDTVEC(dbg) + pushl $0; TRAP(T_TRCTRAP) +IDTVEC(nmi) + pushl $0; TRAP(T_NMI) +IDTVEC(bpt) + pushl $0; TRAP(T_BPTFLT) +IDTVEC(ofl) + pushl $0; TRAP(T_OFLOW) +IDTVEC(bnd) + pushl $0; TRAP(T_BOUND) +IDTVEC(ill) + pushl $0; TRAP(T_PRIVINFLT) +IDTVEC(dna) + pushl $0; TRAP(T_DNA) +IDTVEC(fpusegm) + pushl $0; TRAP(T_FPOPFLT) +IDTVEC(tss) + TRAP(T_TSSFLT) +IDTVEC(missing) + TRAP(T_SEGNPFLT) +IDTVEC(stk) + TRAP(T_STKFLT) +IDTVEC(prot) + TRAP(T_PROTFLT) +IDTVEC(page) + TRAP(T_PAGEFLT) +IDTVEC(mchk) + pushl $0; TRAP(T_MCHK) +IDTVEC(rsvd) + pushl $0; TRAP(T_RESERVED) +IDTVEC(fpu) + pushl $0; TRAP(T_ARITHTRAP) +IDTVEC(align) + TRAP(T_ALIGNFLT) +IDTVEC(xmm) + pushl $0; TRAP(T_XMMFLT) + +IDTVEC(hypervisor_callback) + pushl $0; + pushl $0; + pushal + pushl %ds + pushl %es + pushl %fs +upcall_with_regs_pushed: + SET_KERNEL_SREGS + FAKE_MCOUNT(TF_EIP(%esp)) +call_evtchn_upcall: + movl TF_EIP(%esp),%eax + cmpl $scrit,%eax + jb 10f + cmpl $ecrit,%eax + jb critical_region_fixup + +10: pushl %esp + call evtchn_do_upcall + addl $4,%esp + + /* + * Return via doreti to handle ASTs. + */ + MEXITCOUNT + jmp doreti + + +hypervisor_callback_pending: + DISABLE_INTERRUPTS(%esi) /* cli */ + jmp 10b + + /* + * alltraps entry point. Interrupts are enabled if this was a trap + * gate (TGT), else disabled if this was an interrupt gate (IGT). + * Note that int0x80_syscall is a trap gate. Only page faults + * use an interrupt gate. + */ + + SUPERALIGN_TEXT + .globl alltraps + .type alltraps,@function +alltraps: + pushal + pushl %ds + pushl %es + pushl %fs +alltraps_with_regs_pushed: + SET_KERNEL_SREGS + FAKE_MCOUNT(TF_EIP(%esp)) +calltrap: + call trap + + /* + * Return via doreti to handle ASTs. + */ + MEXITCOUNT + jmp doreti + +/* + * SYSCALL CALL GATE (old entry point for a.out binaries) + * + * The intersegment call has been set up to specify one dummy parameter. + * + * This leaves a place to put eflags so that the call frame can be + * converted to a trap frame. Note that the eflags is (semi-)bogusly + * pushed into (what will be) tf_err and then copied later into the + * final spot. It has to be done this way because esp can't be just + * temporarily altered for the pushfl - an interrupt might come in + * and clobber the saved cs/eip. + */ + SUPERALIGN_TEXT +IDTVEC(lcall_syscall) + pushfl /* save eflags */ + popl 8(%esp) /* shuffle into tf_eflags */ + pushl $7 /* sizeof "lcall 7,0" */ + subl $4,%esp /* skip over tf_trapno */ + pushal + pushl %ds + pushl %es + pushl %fs + SET_KERNEL_SREGS + FAKE_MCOUNT(TF_EIP(%esp)) + call syscall + MEXITCOUNT + jmp doreti + +/* + * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80) + * + * Even though the name says 'int0x80', this is actually a TGT (trap gate) + * rather then an IGT (interrupt gate). Thus interrupts are enabled on + * entry just as they are for a normal syscall. + */ + SUPERALIGN_TEXT +IDTVEC(int0x80_syscall) + pushl $2 /* sizeof "int 0x80" */ + subl $4,%esp /* skip over tf_trapno */ + pushal + pushl %ds + pushl %es + pushl %fs + SET_KERNEL_SREGS + FAKE_MCOUNT(TF_EIP(%esp)) + call syscall + MEXITCOUNT + jmp doreti + +ENTRY(fork_trampoline) + pushl %esp /* trapframe pointer */ + pushl %ebx /* arg1 */ + pushl %esi /* function */ + call fork_exit + addl $12,%esp + /* cut from syscall */ + + /* + * Return via doreti to handle ASTs. + */ + MEXITCOUNT + jmp doreti + + +/* + * To efficiently implement classification of trap and interrupt handlers + * for profiling, there must be only trap handlers between the labels btrap + * and bintr, and only interrupt handlers between the labels bintr and + * eintr. This is implemented (partly) by including files that contain + * some of the handlers. Before including the files, set up a normal asm + * environment so that the included files doen't need to know that they are + * included. + */ + + .data + .p2align 4 + .text + SUPERALIGN_TEXT +MCOUNT_LABEL(bintr) + +#ifdef DEV_ATPIC +#include +#endif +#ifdef DEV_APIC + .data + .p2align 4 + .text + SUPERALIGN_TEXT + +#include +#endif + + .data + .p2align 4 + .text + SUPERALIGN_TEXT +#include + + .text +MCOUNT_LABEL(eintr) + +/* + * void doreti(struct trapframe) + * + * Handle return from interrupts, traps and syscalls. + */ + .text + SUPERALIGN_TEXT + .type doreti,@function +doreti: + FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */ +doreti_next: +#ifdef notyet + /* + * Check if ASTs can be handled now. PSL_VM must be checked first + * since segment registers only have an RPL in non-VM86 mode. + */ + testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */ + jz doreti_notvm86 + movl PCPU(CURPCB),%ecx + testl $PCB_VM86CALL,PCB_FLAGS(%ecx) /* are we in a vm86 call? */ + jz doreti_ast /* can handle ASTS now if not */ + jmp doreti_exit + +doreti_notvm86: +#endif + testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */ + jz doreti_exit /* can't handle ASTs now if not */ + +doreti_ast: + /* + * Check for ASTs atomically with returning. Disabling CPU + * interrupts provides sufficient locking even in the SMP case, + * since we will be informed of any new ASTs by an IPI. + */ + DISABLE_INTERRUPTS(%esi) /* cli */ + movl PCPU(CURTHREAD),%eax + testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax) + je doreti_exit + ENABLE_INTERRUPTS(%esi) /* sti */ + pushl %esp /* pass a pointer to the trapframe */ + call ast + add $4,%esp + jmp doreti_ast + + /* + * doreti_exit: pop registers, iret. + * + * The segment register pop is a special case, since it may + * fault if (for example) a sigreturn specifies bad segment + * registers. The fault is handled in trap.c. + */ +doreti_exit: + ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti) + + .globl scrit +scrit: + __TEST_PENDING(%esi) + jnz hypervisor_callback_pending /* More to go */ + + MEXITCOUNT + + .globl doreti_popl_fs +doreti_popl_fs: + popl %fs + .globl doreti_popl_es +doreti_popl_es: + popl %es + .globl doreti_popl_ds +doreti_popl_ds: + popl %ds + + /* + * This is important: as nothing is atomic over here (we can get + * interrupted any time), we use the critical_region_fixup() in + * order to figure out where out stack is. Therefore, do NOT use + * 'popal' here without fixing up the table! + */ + POPA + addl $8,%esp + .globl doreti_iret +doreti_iret: + jmp hypercall_page + (__HYPERVISOR_iret * 32) + .globl ecrit +ecrit: + /* + * doreti_iret_fault and friends. Alternative return code for + * the case where we get a fault in the doreti_exit code + * above. trap() (i386/i386/trap.c) catches this specific + * case, sends the process a signal and continues in the + * corresponding place in the code below. + */ + ALIGN_TEXT + .globl doreti_iret_fault +doreti_iret_fault: + subl $8,%esp + pushal + pushl %ds + .globl doreti_popl_ds_fault +doreti_popl_ds_fault: + pushl %es + .globl doreti_popl_es_fault +doreti_popl_es_fault: + pushl %fs + .globl doreti_popl_fs_fault +doreti_popl_fs_fault: + movl $0,TF_ERR(%esp) /* XXX should be the error code */ + movl $T_PROTFLT,TF_TRAPNO(%esp) + jmp alltraps_with_regs_pushed + + /* +# [How we do the fixup]. We want to merge the current stack frame with the +# just-interrupted frame. How we do this depends on where in the critical +# region the interrupted handler was executing, and so how many saved +# registers are in each frame. We do this quickly using the lookup table +# 'critical_fixup_table'. For each byte offset in the critical region, it +# provides the number of bytes which have already been popped from the +# interrupted stack frame. +*/ + +.globl critical_region_fixup +critical_region_fixup: + addl $critical_fixup_table-scrit,%eax + movzbl (%eax),%eax # %eax contains num bytes popped + movl %esp,%esi + add %eax,%esi # %esi points at end of src region + movl %esp,%edi + add $0x40,%edi # %edi points at end of dst region + movl %eax,%ecx + shr $2,%ecx # convert bytes to words + je 16f # skip loop if nothing to copy +15: subl $4,%esi # pre-decrementing copy loop + subl $4,%edi + movl (%esi),%eax + movl %eax,(%edi) + loop 15b +16: movl %edi,%esp # final %edi is top of merged stack + jmp hypervisor_callback_pending + + +critical_fixup_table: +.byte 0x0,0x0,0x0 #testb $0x1,(%esi) +.byte 0x0,0x0,0x0,0x0,0x0,0x0 #jne ea +.byte 0x0,0x0 #pop %fs +.byte 0x04 #pop %es +.byte 0x08 #pop %ds +.byte 0x0c #pop %edi +.byte 0x10 #pop %esi +.byte 0x14 #pop %ebp +.byte 0x18 #pop %ebx +.byte 0x1c #pop %ebx +.byte 0x20 #pop %edx +.byte 0x24 #pop %ecx +.byte 0x28 #pop %eax +.byte 0x2c,0x2c,0x2c #add $0x8,%esp +#if 0 +.byte 0x34 #iret +#endif +.byte 0x34,0x34,0x34,0x34,0x34 #HYPERVISOR_iret + + +/* # Hypervisor uses this for application faults while it executes.*/ +ENTRY(failsafe_callback) + pushal + call xen_failsafe_handler +/*# call install_safe_pf_handler */ + movl 28(%esp),%ebx +1: movl %ebx,%ds + movl 32(%esp),%ebx +2: movl %ebx,%es + movl 36(%esp),%ebx +3: movl %ebx,%fs + movl 40(%esp),%ebx +4: movl %ebx,%gs +/*# call install_normal_pf_handler */ + popal + addl $12,%esp + iret + + Property changes on: i386/xen/exception.s ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/locore.s =================================================================== --- i386/xen/locore.s (.../stable/6/sys) (revision 0) +++ i386/xen/locore.s (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,373 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)locore.s 7.3 (Berkeley) 5/13/91 + * $FreeBSD$ + * + * originally from: locore.s, by William F. Jolitz + * + * Substantially rewritten by David Greenman, Rod Grimes, + * Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp + * and many others. + */ + +#include "opt_bootp.h" +#include "opt_compat.h" +#include "opt_nfsroot.h" +#include "opt_global.h" +#include "opt_pmap.h" + +#include +#include + +#include +#include +#include +#include +#include + +#define __ASSEMBLY__ +#include + +/* The defines below have been lifted out of */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define KERNEL_CS FLAT_RING1_CS +#define KERNEL_DS FLAT_RING1_DS + +#include "assym.s" + +.section __xen_guest + .ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000" + .byte 0 + + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "FreeBSD") + ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "HEAD") + ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") + ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long, KERNBASE) + ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .long, KERNBASE) + ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long, btext) + ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long, hypercall_page) + ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long, HYPERVISOR_VIRT_START) +#if 0 + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel") +#endif + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables") + +#ifdef PAE + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V) +#else + ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "no") + ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PG_V, PG_V) +#endif + ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") + ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 1) + + + +/* + * XXX + * + * Note: This version greatly munged to avoid various assembler errors + * that may be fixed in newer versions of gas. Perhaps newer versions + * will have more pleasant appearance. + */ + +/* + * PTmap is recursive pagemap at top of virtual address space. + * Within PTmap, the page directory can be found (third indirection). + */ + .globl PTmap,PTD,PTDpde + .set PTmap,(PTDPTDI << PDRSHIFT) + .set PTD,PTmap + (PTDPTDI * PAGE_SIZE) + .set PTDpde,PTD + (PTDPTDI * PDESIZE) + +/* + * Compiled KERNBASE location and the kernel load address + */ + .globl kernbase + .set kernbase,KERNBASE + .globl kernload + .set kernload,KERNLOAD + +/* + * Globals + */ + .data + ALIGN_DATA /* just to be sure */ + + .space 0x2000 /* space for tmpstk - temporary stack */ +tmpstk: + + .globl bootinfo +bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */ + + .globl KERNend +KERNend: .long 0 /* phys addr end of kernel (just after bss) */ + .globl physfree +physfree: .long 0 /* phys addr of next free page */ + +#ifdef SMP + .globl cpu0prvpage +cpu0pp: .long 0 /* phys addr cpu0 private pg */ +cpu0prvpage: .long 0 /* relocated version */ + + .globl SMPpt +SMPptpa: .long 0 /* phys addr SMP page table */ +SMPpt: .long 0 /* relocated version */ +#endif /* SMP */ + + .globl IdlePTD +IdlePTD: .long 0 /* phys addr of kernel PTD */ + +#ifdef PAE + .globl IdlePDPT +IdlePDPT: .long 0 /* phys addr of kernel PDPT */ +#endif + +#ifdef SMP + .globl KPTphys +#endif + .globl gdtset +KPTphys: .long 0 /* phys addr of kernel page tables */ +gdtset: .long 0 + + .globl proc0kstack +proc0uarea: .long 0 /* address of proc 0 uarea (unused)*/ +proc0kstack: .long 0 /* address of proc 0 kstack space */ +p0upa: .long 0 /* phys addr of proc0 UAREA (unused) */ +p0kpa: .long 0 /* phys addr of proc0's STACK */ + +vm86phystk: .long 0 /* PA of vm86/bios stack */ + + .globl vm86paddr, vm86pa +vm86paddr: .long 0 /* address of vm86 region */ +vm86pa: .long 0 /* phys addr of vm86 region */ + +#ifdef PC98 + .globl pc98_system_parameter +pc98_system_parameter: + .space 0x240 +#endif + + .globl avail_space +avail_space: .long 0 + +/********************************************************************** + * + * Some handy macros + * + */ + +/* + * We're already in protected mode, so no remapping is needed. + */ +#define R(foo) (foo) + +#define ALLOCPAGES(foo) \ + movl R(physfree), %esi ; \ + movl $((foo)*PAGE_SIZE), %eax ; \ + addl %esi, %eax ; \ + movl %eax, R(physfree) ; \ + movl %esi, %edi ; \ + movl $((foo)*PAGE_SIZE),%ecx ; \ + xorl %eax,%eax ; \ + cld ; \ + rep ; \ + stosb + +/* + * fillkpt + * eax = page frame address + * ebx = index into page table + * ecx = how many pages to map + * base = base address of page dir/table + * prot = protection bits + */ +#define fillkpt(base, prot) \ + shll $PTESHIFT,%ebx ; \ + addl base,%ebx ; \ + orl $PG_V,%eax ; \ + orl prot,%eax ; \ +1: movl %eax,(%ebx) ; \ + addl $PAGE_SIZE,%eax ; /* increment physical address */ \ + addl $PTESIZE,%ebx ; /* next pte */ \ + loop 1b + +/* + * fillkptphys(prot) + * eax = physical address + * ecx = how many pages to map + * prot = protection bits + */ +#define fillkptphys(prot) \ + movl %eax, %ebx ; \ + shrl $PAGE_SHIFT, %ebx ; \ + fillkpt(R(KPTphys), prot) + +/* Temporary stack */ +.space 8192 +tmpstack: + .long tmpstack, KERNEL_DS + + .text + +.p2align 12, 0x90 + +#define HYPERCALL_PAGE_OFFSET 0x1000 +.org HYPERCALL_PAGE_OFFSET +ENTRY(hypercall_page) + .cfi_startproc + .skip 0x1000 + .cfi_endproc + +/********************************************************************** + * + * This is where the bootblocks start us, set the ball rolling... + * + */ +NON_GPROF_ENTRY(btext) + /* At the end of our stack, we shall have free space - so store it */ + movl %esp,%ebx + movl %ebx,R(avail_space) + + lss tmpstack,%esp + + pushl %esi + call initvalues + popl %esi + + /* Store the CPUID information */ + xorl %eax,%eax + cpuid # cpuid 0 + movl %eax,R(cpu_high) # highest capability + movl %ebx,R(cpu_vendor) # store vendor string + movl %edx,R(cpu_vendor+4) + movl %ecx,R(cpu_vendor+8) + movb $0,R(cpu_vendor+12) + + movl $1,%eax + cpuid # cpuid 1 + movl %eax,R(cpu_id) # store cpu_id + movl %ebx,R(cpu_procinfo) # store cpu_procinfo + movl %edx,R(cpu_feature) # store cpu_feature + movl %ecx,R(cpu_feature2) # store cpu_feature2 + rorl $8,%eax # extract family type + andl $15,%eax + cmpl $5,%eax + movl $CPU_686,R(cpu) + + movl proc0kstack,%eax + leal (KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp + xorl %ebp,%ebp /* mark end of frames */ +#ifdef PAE + movl IdlePDPT,%esi +#else + movl IdlePTD,%esi +#endif + movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) + pushl physfree + call init386 + addl $4, %esp + call mi_startup + /* NOTREACHED */ + int $3 + +/* + * Signal trampoline, copied to top of user stack + */ +NON_GPROF_ENTRY(sigcode) + calll *SIGF_HANDLER(%esp) + leal SIGF_UC(%esp),%eax /* get ucontext */ + pushl %eax + testl $PSL_VM,UC_EFLAGS(%eax) + jne 1f + mov UC_GS(%eax), %gs /* restore %gs */ +1: + movl $SYS_sigreturn,%eax + pushl %eax /* junk to fake return addr. */ + int $0x80 /* enter kernel with args */ + /* on stack */ +1: + jmp 1b + +#ifdef COMPAT_FREEBSD4 + ALIGN_TEXT +freebsd4_sigcode: + calll *SIGF_HANDLER(%esp) + leal SIGF_UC4(%esp),%eax /* get ucontext */ + pushl %eax + testl $PSL_VM,UC4_EFLAGS(%eax) + jne 1f + mov UC4_GS(%eax),%gs /* restore %gs */ +1: + movl $344,%eax /* 4.x SYS_sigreturn */ + pushl %eax /* junk to fake return addr. */ + int $0x80 /* enter kernel with args */ + /* on stack */ +1: + jmp 1b +#endif + +#ifdef COMPAT_43 + ALIGN_TEXT +osigcode: + call *SIGF_HANDLER(%esp) /* call signal handler */ + lea SIGF_SC(%esp),%eax /* get sigcontext */ + pushl %eax + testl $PSL_VM,SC_PS(%eax) + jne 9f + movl SC_GS(%eax),%gs /* restore %gs */ +9: + movl $103,%eax /* 3.x SYS_sigreturn */ + pushl %eax /* junk to fake return addr. */ + int $0x80 /* enter kernel with args */ +0: jmp 0b +#endif /* COMPAT_43 */ + + ALIGN_TEXT +esigcode: + + .data + .globl szsigcode +szsigcode: + .long esigcode-sigcode +#ifdef COMPAT_FREEBSD4 + .globl szfreebsd4_sigcode +szfreebsd4_sigcode: + .long esigcode-freebsd4_sigcode +#endif +#ifdef COMPAT_43 + .globl szosigcode +szosigcode: + .long esigcode-osigcode +#endif Property changes on: i386/xen/locore.s ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/xen_bus.c =================================================================== --- i386/xen/xen_bus.c (.../stable/6/sys) (revision 0) +++ i386/xen/xen_bus.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,238 @@ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device"); + +struct xenbus_device { + struct resource_list xen_resources; +}; + +#define DEVTOXEN(dev) ((struct xenbus_device *)device_get_ivars(dev)) + +static void xenbus_identify(driver_t *, device_t); +static int xenbus_probe(device_t); +static int xenbus_attach(device_t); +static int xenbus_print_child(device_t, device_t); +static device_t xenbus_add_child(device_t bus, int order, const char *name, + int unit); +static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *, + u_long, u_long, u_long, u_int); +static int xenbus_release_resource(device_t, device_t, int, int, + struct resource *); +static int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long); +static int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long *); +static void xenbus_delete_resource(device_t, device_t, int, int); + + +static device_method_t xenbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, xenbus_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, bus_generic_suspend), + DEVMETHOD(device_resume, bus_generic_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, xenbus_print_child), + DEVMETHOD(bus_add_child, xenbus_add_child), + DEVMETHOD(bus_read_ivar, bus_generic_read_ivar), + DEVMETHOD(bus_write_ivar, bus_generic_write_ivar), + DEVMETHOD(bus_set_resource, xenbus_set_resource), + DEVMETHOD(bus_get_resource, xenbus_get_resource), + DEVMETHOD(bus_alloc_resource, xenbus_alloc_resource), + DEVMETHOD(bus_release_resource, xenbus_release_resource), + DEVMETHOD(bus_delete_resource, xenbus_delete_resource), + DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), + DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), + DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), + DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + + { 0, 0 } +}; + + +static driver_t xenbus_driver = { + "xenbus", + xenbus_methods, + 1, /* no softc */ +}; +static devclass_t xenbus_devclass; +static device_t xenbus_dev; +static boolean_t xenbus_probe_delay = TRUE; /* delay child probes */ + +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); + +static void +xenbus_identify(driver_t *driver, device_t parent) +{ + + /* + * Add child device with order of 0 so it gets probed + * first + */ + xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0); + if (xenbus_dev == NULL) + panic("xenbus: could not attach"); +} + +static int +xenbus_probe(device_t dev) +{ + device_set_desc(dev, "xen system"); + device_quiet(dev); + return (0); +} + +static int +xenbus_attach(device_t dev) +{ + /* + * First, let our child driver's identify any child devices that + * they can find. Once that is done attach any devices that we + * found. + */ + if (!xenbus_probe_delay) { + bus_generic_probe(dev); + bus_generic_attach(dev); + } + + return 0; +} + + +static int +xenbus_print_all_resources(device_t dev) +{ + struct xenbus_device *xdev = device_get_ivars(dev); + struct resource_list *rl = &xdev->xen_resources; + int retval = 0; + + if (STAILQ_FIRST(rl)) + retval += printf(" at"); + + retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx"); + retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx"); + retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld"); + + return retval; +} + + +static int +xenbus_print_child(device_t bus, device_t child) +{ + int retval = 0; + + retval += bus_print_child_header(bus, child); + retval += xenbus_print_all_resources(child); + retval += printf(" on motherboard\n"); /* XXX "motherboard", ick */ + + return (retval); +} + +static device_t +xenbus_add_child(device_t bus, int order, const char *name, int unit) +{ + device_t child; + struct xenbus_device *xendev; + + xendev = malloc(sizeof(struct xenbus_device), M_XENDEV, + M_NOWAIT | M_ZERO); + if (!xendev) + return(0); + resource_list_init(&xendev->xen_resources); + + child = device_add_child_ordered(bus, order, name, unit); + + /* should we free this in xenbus_child_detached? */ + device_set_ivars(child, xendev); + + return(child); +} + +static struct resource * +xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid, + u_long start, u_long end, u_long count, u_int flags) +{ + struct xenbus_device *xendev = DEVTOXEN(child); + struct resource_list *rl = &xendev->xen_resources; + + return (resource_list_alloc(rl, bus, child, type, rid, start, end, + count, flags)); +} + + +static int +xenbus_release_resource(device_t bus, device_t child, int type, int rid, + struct resource *r) +{ + struct xenbus_device *xendev = DEVTOXEN(child); + struct resource_list *rl = &xendev->xen_resources; + + return (resource_list_release(rl, bus, child, type, rid, r)); +} + +static int +xenbus_set_resource(device_t dev, device_t child, int type, int rid, + u_long start, u_long count) +{ + struct xenbus_device *xendev = DEVTOXEN(child); + struct resource_list *rl = &xendev->xen_resources; + + resource_list_add(rl, type, rid, start, start + count - 1, count); + return(0); +} + +static int +xenbus_get_resource(device_t dev, device_t child, int type, int rid, + u_long *startp, u_long *countp) +{ + struct xenbus_device *xendev = DEVTOXEN(child); + struct resource_list *rl = &xendev->xen_resources; + struct resource_list_entry *rle; + + rle = resource_list_find(rl, type, rid); + if (!rle) + return(ENOENT); + if (startp) + *startp = rle->start; + if (countp) + *countp = rle->count; + return(0); +} + +static void +xenbus_delete_resource(device_t dev, device_t child, int type, int rid) +{ + struct xenbus_device *xendev = DEVTOXEN(child); + struct resource_list *rl = &xendev->xen_resources; + + resource_list_delete(rl, type, rid); +} + +static void +xenbus_init(void *unused) +{ + xenbus_probe_delay = FALSE; + xenbus_attach(xenbus_dev); +} +SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL); Property changes on: i386/xen/xen_bus.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/mptable.c =================================================================== --- i386/xen/mptable.c (.../stable/6/sys) (revision 0) +++ i386/xen/mptable.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,130 @@ +/*- + * Copyright (c) 2003 John Baldwin + * Copyright (c) 1996, by Steve Passe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +static int mptable_probe(void); +static int mptable_probe_cpus(void); +static void mptable_register(void *dummy); +static int mptable_setup_local(void); +static int mptable_setup_io(void); + +static struct apic_enumerator mptable_enumerator = { + "MPTable", + mptable_probe, + mptable_probe_cpus, + mptable_setup_local, + mptable_setup_io +}; + +static int +mptable_probe(void) +{ + + return (-100); +} + +static int +mptable_probe_cpus(void) +{ + int i, rc; + + for (i = 0; i < MAXCPU; i++) { + rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); + if (rc >= 0) + cpu_add(i, (i == 0)); + } + + return (0); +} + +/* + * Initialize the local APIC on the BSP. + */ +static int +mptable_setup_local(void) +{ + + return (0); +} + +static int +mptable_setup_io(void) +{ + + return (0); +} + +static void +mptable_register(void *dummy __unused) +{ + + apic_register_enumerator(&mptable_enumerator); +} +SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register, + NULL); + + + +int +mptable_pci_probe_table(int bus) +{ + + return (0); +} + +int +mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin) +{ + + return (0); +} + Property changes on: i386/xen/mptable.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/clock.c =================================================================== --- i386/xen/clock.c (.../stable/6/sys) (revision 0) +++ i386/xen/clock.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,963 @@ +/*- + * Copyright (c) 1990 The Regents of the University of California. + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * William Jolitz and Don Ahn. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)clock.c 7.2 (Berkeley) 5/12/91 + */ + +#include +__FBSDID("$FreeBSD$"); + +/* #define DELAYDEBUG */ +/* + * Routines to handle clock hardware. + */ + +#include "opt_ddb.h" +#include "opt_clock.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#if defined(SMP) +#include +#endif +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we + * can use a simple formula for leap years. + */ +#define LEAPYEAR(y) (!((y) % 4)) +#define DAYSPERYEAR (28+30*4+31*7) + +#ifndef TIMER_FREQ +#define TIMER_FREQ 1193182 +#endif + +#ifdef CYC2NS_SCALE_FACTOR +#undef CYC2NS_SCALE_FACTOR +#endif +#define CYC2NS_SCALE_FACTOR 10 + +/* Values for timerX_state: */ +#define RELEASED 0 +#define RELEASE_PENDING 1 +#define ACQUIRED 2 +#define ACQUIRE_PENDING 3 + +#define RTC_LOCK_INIT \ + mtx_init(&clock_lock, "clk", NULL, MTX_SPIN) +#define RTC_LOCK mtx_lock_spin(&clock_lock) +#define RTC_UNLOCK mtx_unlock_spin(&clock_lock) + +int adjkerntz; /* local offset from GMT in seconds */ +int clkintr_pending; +int pscnt = 1; +int psdiv = 1; +int statclock_disable; +int disable_rtc_set = 0; +int wall_cmos_clock; +u_int timer_freq = TIMER_FREQ; +static int independent_wallclock; +static int xen_disable_rtc_set; +static u_long cached_gtm; /* cached quotient for TSC -> microseconds */ +static u_long cyc2ns_scale; +static u_char timer2_state = RELEASED; +static struct timespec shadow_tv; +static uint32_t shadow_tv_version; /* XXX: lazy locking */ +static uint64_t processed_system_time; /* stime (ns) at last processing. */ +static unsigned int time_irq; + +#ifdef XEN_PRIVILEGED_GUEST +static struct mtx clock_lock; +static int rtc_reg; +#endif + +static const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31}; + +SYSCTL_INT(_machdep, OID_AUTO, independent_wallclock, + CTLFLAG_RW, &independent_wallclock, 0, ""); +SYSCTL_INT(_machdep, OID_AUTO, xen_disable_rtc_set, + CTLFLAG_RW, &xen_disable_rtc_set, 1, ""); + + +#define do_div(n,base) ({ \ + unsigned long __upper, __low, __high, __mod, __base; \ + __base = (base); \ + __asm("":"=a" (__low), "=d" (__high):"A" (n)); \ + __upper = __high; \ + if (__high) { \ + __upper = __high % (__base); \ + __high = __high / (__base); \ + } \ + __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \ + __asm("":"=A" (n):"a" (__low),"d" (__high)); \ + __mod; \ +}) + + +#define NS_PER_TICK (1000000000ULL/hz) + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + + +/* convert from cycles(64bits) => nanoseconds (64bits) + * basic equation: + * ns = cycles / (freq / ns_per_sec) + * ns = cycles * (ns_per_sec / freq) + * ns = cycles * (10^9 / (cpu_mhz * 10^6)) + * ns = cycles * (10^3 / cpu_mhz) + * + * Then we use scaling math (suggested by george@mvista.com) to get: + * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * cyc2ns_scale / SC + * + * And since SC is a constant power of two, we can convert the div + * into a shift. + * -johnstul@us.ibm.com "math is hard, lets go shopping!" + */ +static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +{ + cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; +} + +static inline unsigned long long cycles_2_ns(unsigned long long cyc) +{ + return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR; +} + +/* + * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, + * yielding a 64-bit result. + */ +static inline uint64_t +scale_delta(uint64_t delta, uint32_t mul_frac, int shift) +{ + uint64_t product; + uint32_t tmp1, tmp2; + + if ( shift < 0 ) + delta >>= -shift; + else + delta <<= shift; + + __asm__ ( + "mul %5 ; " + "mov %4,%%eax ; " + "mov %%edx,%4 ; " + "mul %5 ; " + "add %4,%%eax ; " + "xor %5,%5 ; " + "adc %5,%%edx ; " + : "=A" (product), "=r" (tmp1), "=r" (tmp2) + : "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) ); + + return product; +} + +static uint64_t get_nsec_offset(struct shadow_time_info *shadow) +{ + uint64_t now, delta; + rdtscll(now); + delta = now - shadow->tsc_timestamp; + return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); +} + +static void update_wallclock(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + + do { + shadow_tv_version = s->wc_version; + rmb(); + shadow_tv.tv_sec = s->wc_sec; + shadow_tv.tv_nsec = s->wc_nsec; + rmb(); + } + while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); + +} + +/* + * Reads a consistent set of time-base values from Xen, into a shadow data + * area. Must be called with the xtime_lock held for writing. + */ +static void __get_time_values_from_xen(void) +{ + shared_info_t *s = HYPERVISOR_shared_info; + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &s->vcpu_info[PCPU_GET(cpuid)].time; + dst = PCPU_PTR(shadow_time); + + do { + dst->version = src->version; + rmb(); + dst->tsc_timestamp = src->tsc_timestamp; + dst->system_timestamp = src->system_time; + dst->tsc_to_nsec_mul = src->tsc_to_system_mul; + dst->tsc_shift = src->tsc_shift; + rmb(); + } + while ((src->version & 1) | (dst->version ^ src->version)); + + dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000; +} + +static inline int time_values_up_to_date(void) +{ + struct vcpu_time_info *src; + struct shadow_time_info *dst; + + src = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)].time; + dst = PCPU_PTR(shadow_time); + + rmb(); + return (dst->version == src->version); +} + +static unsigned xen_get_timecount(struct timecounter *tc); + +static struct timecounter xen_timecounter = { + xen_get_timecount, /* get_timecount */ + 0, /* no poll_pps */ + ~0u, /* counter_mask */ + 0, /* frequency */ + "ixen", /* name */ + 0 /* quality */ +}; + +static void +clkintr(void *arg) +{ + int64_t delta_cpu, delta; + struct shadow_time_info *shadow = PCPU_PTR(shadow_time); + struct clockframe *frame = (struct clockframe *)arg; + + do { + __get_time_values_from_xen(); + + delta = delta_cpu = + shadow->system_timestamp + get_nsec_offset(shadow); + delta -= processed_system_time; + delta_cpu -= PCPU_GET(processed_system_time); + + } while (!time_values_up_to_date()); + + if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) { + printf("Timer ISR: Time went backwards: %lld\n", delta); + return; + } + + /* Process elapsed ticks since last call. */ + if (delta >= NS_PER_TICK) { + processed_system_time += (delta / NS_PER_TICK) * NS_PER_TICK; + *PCPU_PTR(processed_system_time) += (delta_cpu / NS_PER_TICK) * NS_PER_TICK; + } + hardclock(frame); + + /* + * Take synchronised time from Xen once a minute if we're not + * synchronised ourselves, and we haven't chosen to keep an independent + * time base. + */ + + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { + update_wallclock(); + tc_setclock(&shadow_tv); + } + + /* XXX TODO */ +} + +static uint32_t +getit(void) +{ + struct shadow_time_info *shadow; + shadow = PCPU_PTR(shadow_time); + __get_time_values_from_xen(); + return shadow->system_timestamp + get_nsec_offset(shadow); +} + + +/* + * Wait "n" microseconds. + * Relies on timer 1 counting down from (timer_freq / hz) + * Note: timer had better have been programmed before this is first used! + */ +void +DELAY(int n) +{ + int delta, ticks_left; + uint32_t tick, prev_tick; +#ifdef DELAYDEBUG + int getit_calls = 1; + int n1; + static int state = 0; + + if (state == 0) { + state = 1; + for (n1 = 1; n1 <= 10000000; n1 *= 10) + DELAY(n1); + state = 2; + } + if (state == 1) + printf("DELAY(%d)...", n); +#endif + /* + * Read the counter first, so that the rest of the setup overhead is + * counted. Guess the initial overhead is 20 usec (on most systems it + * takes about 1.5 usec for each of the i/o's in getit(). The loop + * takes about 6 usec on a 486/33 and 13 usec on a 386/20. The + * multiplications and divisions to scale the count take a while). + * + * However, if ddb is active then use a fake counter since reading + * the i8254 counter involves acquiring a lock. ddb must not go + * locking for many reasons, but it calls here for at least atkbd + * input. + */ + prev_tick = getit(); + + n -= 0; /* XXX actually guess no initial overhead */ + /* + * Calculate (n * (timer_freq / 1e6)) without using floating point + * and without any avoidable overflows. + */ + if (n <= 0) + ticks_left = 0; + else if (n < 256) + /* + * Use fixed point to avoid a slow division by 1000000. + * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest. + * 2^15 is the first power of 2 that gives exact results + * for n between 0 and 256. + */ + ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15; + else + /* + * Don't bother using fixed point, although gcc-2.7.2 + * generates particularly poor code for the long long + * division, since even the slow way will complete long + * before the delay is up (unless we're interrupted). + */ + ticks_left = ((u_int)n * (long long)timer_freq + 999999) + / 1000000; + + while (ticks_left > 0) { + tick = getit(); +#ifdef DELAYDEBUG + ++getit_calls; +#endif + delta = tick - prev_tick; + prev_tick = tick; + if (delta < 0) { + /* + * Guard against timer0_max_count being wrong. + * This shouldn't happen in normal operation, + * but it may happen if set_timer_freq() is + * traced. + */ + /* delta += timer0_max_count; ??? */ + if (delta < 0) + delta = 0; + } + ticks_left -= delta; + } +#ifdef DELAYDEBUG + if (state == 1) + printf(" %d calls to getit() at %d usec each\n", + getit_calls, (n + 5) / getit_calls); +#endif +} + + +int +sysbeep(int pitch, int period) +{ + return (0); +} + +/* + * Restore all the timers non-atomically (XXX: should be atomically). + * + * This function is called from pmtimer_resume() to restore all the timers. + * This should not be necessary, but there are broken laptops that do not + * restore all the timers on resume. + */ +void +timer_restore(void) +{ + /* Get timebases for new environment. */ + __get_time_values_from_xen(); + + /* Reset our own concept of passage of system time. */ + processed_system_time = pcpu_find(0)->pc_shadow_time.system_timestamp; + pcpu_find(0)->pc_processed_system_time = processed_system_time; +} + +void +startrtclock() +{ + unsigned long long alarm; + uint64_t __cpu_khz; + uint32_t cpu_khz; + struct vcpu_time_info *info; + + /* initialize xen values */ + __get_time_values_from_xen(); + processed_system_time = pcpu_find(0)->pc_shadow_time.system_timestamp; + pcpu_find(0)->pc_processed_system_time = processed_system_time; + + __cpu_khz = 1000000ULL << 32; + info = &HYPERVISOR_shared_info->vcpu_info[0].time; + + do_div(__cpu_khz, info->tsc_to_system_mul); + if ( info->tsc_shift < 0 ) + cpu_khz = __cpu_khz << -info->tsc_shift; + else + cpu_khz = __cpu_khz >> info->tsc_shift; + + printf("Xen reported: %u.%03u MHz processor.\n", + cpu_khz / 1000, cpu_khz % 1000); + + /* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz = + (2^32 * 1 / (clocks/us)) */ + { + unsigned long eax=0, edx=1000; + __asm__("divl %2" + :"=a" (cached_gtm), "=d" (edx) + :"r" (cpu_khz), + "0" (eax), "1" (edx)); + } + + set_cyc2ns_scale(cpu_khz/1000); + tsc_freq = cpu_khz * 1000; + + timer_freq = xen_timecounter.tc_frequency = 1000000000LL; + tc_init(&xen_timecounter); + + + rdtscll(alarm); +} + +#ifdef XEN_PRIVILEGED_GUEST +/* + * RTC support routines + */ + +int +rtcin(reg) + int reg; +{ + u_char val; + + RTC_LOCK; + outb(IO_RTC, reg); + inb(0x84); + val = inb(IO_RTC + 1); + inb(0x84); + RTC_UNLOCK; + return (val); +} + + +static __inline int +readrtc(int port) +{ + return(bcd2bin(rtcin(port))); +} + +void +writertc(int reg, u_char val) +{ + + RTC_LOCK; + if (rtc_reg != reg) { + inb(0x84); + outb(IO_RTC, reg); + rtc_reg = reg; + inb(0x84); + } + outb(IO_RTC + 1, val); + inb(0x84); + RTC_UNLOCK; +} + + +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +static void +domu_inittodr(time_t base) +{ + unsigned long sec; + int s, y; + struct timespec ts; + + update_wallclock(); + + RTC_LOCK; + + if (base) { + ts.tv_sec = base; + ts.tv_nsec = 0; + tc_setclock(&ts); + } + + sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + y = time_second - shadow_tv.tv_sec; + if (y <= -2 || y >= 2) { + /* badly off, adjust it */ + tc_setclock(&shadow_tv); + } + RTC_UNLOCK; +} + +/* + * Write system time back to RTC. + */ +static void +domu_resettodr(void) +{ + unsigned long tm; + int s; + dom0_op_t op; + struct shadow_time_info *shadow; + + shadow = PCPU_PTR(shadow_time); + if (xen_disable_rtc_set) + return; + + s = splclock(); + tm = time_second; + splx(s); + + tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + if ((xen_start_info->flags & SIF_INITDOMAIN) && + !independent_wallclock) + { + op.cmd = DOM0_SETTIME; + op.u.settime.secs = tm; + op.u.settime.nsecs = 0; + op.u.settime.system_time = shadow->system_timestamp; + HYPERVISOR_dom0_op(&op); + update_wallclock(); + } else if (independent_wallclock) { + /* notyet */ + ; + } +} + +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +void +inittodr(time_t base) +{ + unsigned long sec, days; + int year, month; + int y, m, s; + struct timespec ts; + + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + domu_inittodr(base); + return; + } + + if (base) { + s = splclock(); + ts.tv_sec = base; + ts.tv_nsec = 0; + tc_setclock(&ts); + splx(s); + } + + /* Look if we have a RTC present and the time is valid */ + if (!(rtcin(RTC_STATUSD) & RTCSD_PWR)) + goto wrong_time; + + /* wait for time update to complete */ + /* If RTCSA_TUP is zero, we have at least 244us before next update */ + s = splhigh(); + while (rtcin(RTC_STATUSA) & RTCSA_TUP) { + splx(s); + s = splhigh(); + } + + days = 0; +#ifdef USE_RTC_CENTURY + year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100; +#else + year = readrtc(RTC_YEAR) + 1900; + if (year < 1970) + year += 100; +#endif + if (year < 1970) { + splx(s); + goto wrong_time; + } + month = readrtc(RTC_MONTH); + for (m = 1; m < month; m++) + days += daysinmonth[m-1]; + if ((month > 2) && LEAPYEAR(year)) + days ++; + days += readrtc(RTC_DAY) - 1; + for (y = 1970; y < year; y++) + days += DAYSPERYEAR + LEAPYEAR(y); + sec = ((( days * 24 + + readrtc(RTC_HRS)) * 60 + + readrtc(RTC_MIN)) * 60 + + readrtc(RTC_SEC)); + /* sec now contains the number of seconds, since Jan 1 1970, + in the local time zone */ + + sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + y = time_second - sec; + if (y <= -2 || y >= 2) { + /* badly off, adjust it */ + ts.tv_sec = sec; + ts.tv_nsec = 0; + tc_setclock(&ts); + } + splx(s); + return; + + wrong_time: + printf("Invalid time in real time clock.\n"); + printf("Check and reset the date immediately!\n"); +} + + + +/* + * Write system time back to RTC + */ +void +resettodr() +{ + unsigned long tm; + int y, m, s; + + if (!(xen_start_info->flags & SIF_INITDOMAIN)) { + domu_resettodr(); + return; + } + + if (xen_disable_rtc_set) + return; + + s = splclock(); + tm = time_second; + splx(s); + + /* Disable RTC updates and interrupts. */ + writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR); + + /* Calculate local time to put in RTC */ + + tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0); + + writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */ + writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */ + writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */ + + /* We have now the days since 01-01-1970 in tm */ + writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */ + for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y); + tm >= m; + y++, m = DAYSPERYEAR + LEAPYEAR(y)) + tm -= m; + + /* Now we have the years in y and the day-of-the-year in tm */ + writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */ +#ifdef USE_RTC_CENTURY + writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */ +#endif + for (m = 0; ; m++) { + int ml; + + ml = daysinmonth[m]; + if (m == 1 && LEAPYEAR(y)) + ml++; + if (tm < ml) + break; + tm -= ml; + } + + writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */ + writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */ + + /* Reenable RTC updates and interrupts. */ + writertc(RTC_STATUSB, RTCSB_24HR); + rtcin(RTC_INTR); +} +#else +/* + * Initialize the time of day register, based on the time base which is, e.g. + * from a filesystem. + */ +void +inittodr(time_t base) +{ + int s, y; + struct timespec ts; + + s = splclock(); + if (base) { + ts.tv_sec = base; + ts.tv_nsec = 0; + tc_setclock(&ts); + } + + y = time_second - shadow_tv.tv_sec; + if (y <= -2 || y >= 2) { + /* badly off, adjust it */ + ts.tv_sec = shadow_tv.tv_sec; + ts.tv_nsec = shadow_tv.tv_nsec * 1000000000; /* :-/ */ + tc_setclock(&ts); + } + splx(s); +} + +/* + * Write system time back to RTC. Not supported for guest domains. + */ +void +resettodr() +{ +} +#endif + + +int +acquire_timer2(int mode) +{ + + if (timer2_state != RELEASED) + return (-1); + timer2_state = ACQUIRED; + + /* + * This access to the timer registers is as atomic as possible + * because it is a single instruction. We could do better if we + * knew the rate. Use of splclock() limits glitches to 10-100us, + * and this is probably good enough for timer2, so we aren't as + * careful with it as with timer0. + */ + outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f)); + + return (0); +} + +int +release_timer2() +{ + + if (timer2_state != ACQUIRED) + return (-1); + timer2_state = RELEASED; + outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT); + return (0); +} + +static struct vcpu_set_periodic_timer xen_set_periodic_tick; + +/* + * Start clocks running. + */ +void +cpu_initclocks(void) +{ + int error; + + xen_set_periodic_tick.period_ns = NS_PER_TICK; + + HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0, + &xen_set_periodic_tick); + + if (time_irq) + unbind_from_irqhandler(time_irq); + time_irq = 0; + + error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", + clkintr, + INTR_TYPE_CLK | INTR_FAST, &time_irq); + if (error) + panic("failed to register clock interrupt\n"); + + /* should fast clock be enabled ? */ +} + +int +ap_cpu_initclocks(int cpu) +{ + unsigned int time_irq; + int error; + + xen_set_periodic_tick.period_ns = NS_PER_TICK; + + HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu, + &xen_set_periodic_tick); + error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", + clkintr, + INTR_TYPE_CLK | INTR_FAST, &time_irq); + if (error) + panic("failed to register clock interrupt\n"); + + + return (0); +} + +void +cpu_startprofclock(void) +{ + + printf("cpu_startprofclock: profiling clock is not supported\n"); +} + +void +cpu_stopprofclock(void) +{ + + printf("cpu_stopprofclock: profiling clock is not supported\n"); +} +#define NSEC_PER_USEC 1000 + +static uint32_t +xen_get_timecount(struct timecounter *tc) +{ + uint64_t clk; + struct shadow_time_info *shadow = PCPU_PTR(shadow_time); + + __get_time_values_from_xen(); + + clk = shadow->system_timestamp + get_nsec_offset(shadow); + + return (uint32_t)((clk / NS_PER_TICK) * NS_PER_TICK); + +} + +/* Return system time offset by ticks */ +uint64_t +get_system_time(int ticks) +{ + return processed_system_time + (ticks * NS_PER_TICK); +} + +/* + * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c + */ + +#if 0 +static uint32_t +xen_get_offset(void) +{ + register unsigned long eax, edx; + + /* Read the Time Stamp Counter */ + + rdtsc(eax,edx); + + /* .. relative to previous jiffy (32 bits is enough) */ + eax -= shadow_tsc_stamp; + + /* + * Time offset = (tsc_low delta) * cached_gtm + * = (tsc_low delta) * (usecs_per_clock) + * = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy) + * + * Using a mull instead of a divl saves up to 31 clock cycles + * in the critical path. + */ + + __asm__("mull %2" + :"=a" (eax), "=d" (edx) + :"rm" (cached_gtm), + "0" (eax)); + + /* our adjusted time offset in microseconds */ + return edx; +} +#endif +void +idle_block(void) +{ + int err; + + __get_time_values_from_xen(); + err = HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK); + KASSERT(err == 0, ("set_timer_op failed")); + HYPERVISOR_sched_op(SCHEDOP_block, 0); +} Property changes on: i386/xen/clock.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/xen_machdep.c =================================================================== --- i386/xen/xen_machdep.c (.../stable/6/sys) (revision 0) +++ i386/xen/xen_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1280 @@ +/* + * + * Copyright (c) 2004 Christian Limpach. + * Copyright (c) 2004-2006,2008 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christian Limpach. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include +#include +#include +#include +#include +#include +#include +#ifdef SMP +#include +#endif + + +#include + +#define IDTVEC(name) __CONCAT(X,name) + +extern inthand_t +IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), + IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), + IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), + IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), + IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); + + +int xendebug_flags; +start_info_t *xen_start_info; +shared_info_t *HYPERVISOR_shared_info; +xen_pfn_t *xen_machine_phys = machine_to_phys_mapping; +xen_pfn_t *xen_phys_machine; +xen_pfn_t *xen_pfn_to_mfn_frame_list[16]; +xen_pfn_t *xen_pfn_to_mfn_frame_list_list; +int preemptable, init_first; +extern unsigned int avail_space; + +static void printk(const char *fmt, ...); + +void ni_cli(void); +void ni_sti(void); + + +void +ni_cli(void) +{ + __asm__("pushl %edx;" + "pushl %eax;" + ); + __cli(); + __asm__("popl %eax;" + "popl %edx;" + ); +} + + +void +ni_sti(void) +{ + __asm__("pushl %edx;" + "pushl %esi;" + "pushl %eax;" + ); + __sti(); + __asm__("popl %eax;" + "popl %esi;" + "popl %edx;" + ); +} + +/* + * Modify the cmd_line by converting ',' to NULLs so that it is in a format + * suitable for the static env vars. + */ +char * +xen_setbootenv(char *cmd_line) +{ + char *cmd_line_next; + + /* Skip leading spaces */ + for (; *cmd_line == ' '; cmd_line++); + + printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line); + + for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;); + return cmd_line; +} + +static struct +{ + const char *ev; + int mask; +} howto_names[] = { + {"boot_askname", RB_ASKNAME}, + {"boot_single", RB_SINGLE}, + {"boot_nosync", RB_NOSYNC}, + {"boot_halt", RB_ASKNAME}, + {"boot_serial", RB_SERIAL}, + {"boot_cdrom", RB_CDROM}, + {"boot_gdb", RB_GDB}, + {"boot_gdb_pause", RB_RESERVED1}, + {"boot_verbose", RB_VERBOSE}, + {"boot_multicons", RB_MULTIPLE}, + {NULL, 0} +}; + +int +xen_boothowto(char *envp) +{ + int i, howto = 0; + + /* get equivalents from the environment */ + for (i = 0; howto_names[i].ev != NULL; i++) + if (getenv(howto_names[i].ev) != NULL) + howto |= howto_names[i].mask; + return howto; +} + +#define PRINTK_BUFSIZE 1024 +static void +printk(const char *fmt, ...) +{ + __va_list ap; + int retval; + static char buf[PRINTK_BUFSIZE]; + + return; + + va_start(ap, fmt); + retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap); + va_end(ap); + buf[retval] = 0; + (void)HYPERVISOR_console_write(buf, retval); +} + + +#define XPQUEUE_SIZE 128 + +struct mmu_log { + char *file; + int line; +}; + +#ifdef SMP +/* per-cpu queues and indices */ +#ifdef INVARIANTS +static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE]; +#endif + +static int xpq_idx[MAX_VIRT_CPUS]; +static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE]; + +#define XPQ_QUEUE xpq_queue[vcpu] +#define XPQ_IDX xpq_idx[vcpu] +#define SET_VCPU() int vcpu = gdtset ? PCPU_GET(cpuid) : 0 + +#define XPQ_QUEUE_LOG xpq_queue_log[vcpu] +#else + +static mmu_update_t xpq_queue[XPQUEUE_SIZE]; +static struct mmu_log xpq_queue_log[XPQUEUE_SIZE]; +static int xpq_idx = 0; + +#define XPQ_QUEUE_LOG xpq_queue_log +#define XPQ_QUEUE xpq_queue +#define XPQ_IDX xpq_idx +#define SET_VCPU() +#endif /* !SMP */ + +#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1); + +#if 0 +static void +xen_dump_queue(void) +{ + int _xpq_idx = XPQ_IDX; + int i; + + if (_xpq_idx <= 1) + return; + + printk("xen_dump_queue(): %u entries\n", _xpq_idx); + for (i = 0; i < _xpq_idx; i++) { + printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); + } +} +#endif + + +static __inline void +_xen_flush_queue(void) +{ + SET_VCPU(); + int _xpq_idx = XPQ_IDX; + int error, i; + /* window of vulnerability here? */ + + if (__predict_true(gdtset)) + critical_enter(); + XPQ_IDX = 0; + /* Make sure index is cleared first to avoid double updates. */ + error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE, + _xpq_idx, NULL, DOMID_SELF); + +#if 0 + if (__predict_true(gdtset)) + for (i = _xpq_idx; i > 0;) { + if (i >= 3) { + CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx " + "ptr: %lx val: %lx ptr: %lx", + (XPQ_QUEUE[i-1].val & 0xffffffff), + (XPQ_QUEUE[i-1].ptr & 0xffffffff), + (XPQ_QUEUE[i-2].val & 0xffffffff), + (XPQ_QUEUE[i-2].ptr & 0xffffffff), + (XPQ_QUEUE[i-3].val & 0xffffffff), + (XPQ_QUEUE[i-3].ptr & 0xffffffff)); + i -= 3; + } else if (i == 2) { + CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx", + (XPQ_QUEUE[i-1].val & 0xffffffff), + (XPQ_QUEUE[i-1].ptr & 0xffffffff), + (XPQ_QUEUE[i-2].val & 0xffffffff), + (XPQ_QUEUE[i-2].ptr & 0xffffffff)); + i = 0; + } else { + CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", + (XPQ_QUEUE[i-1].val & 0xffffffff), + (XPQ_QUEUE[i-1].ptr & 0xffffffff)); + i = 0; + } + } +#endif + if (__predict_true(gdtset)) + critical_exit(); + if (__predict_false(error < 0)) { + for (i = 0; i < _xpq_idx; i++) + printf("val: %llx ptr: %llx\n", + XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr); + panic("Failed to execute MMU updates: %d", error); + } + +} + +void +xen_flush_queue(void) +{ + SET_VCPU(); + if (XPQ_IDX != 0) _xen_flush_queue(); +} + +static __inline void +xen_increment_idx(void) +{ + SET_VCPU(); + + XPQ_IDX++; + if (__predict_false(XPQ_IDX == XPQUEUE_SIZE)) + xen_flush_queue(); +} + +void +xen_check_queue(void) +{ +#ifdef INVARIANTS + SET_VCPU(); + + KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); +#endif +} + +void +xen_invlpg(vm_offset_t va) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_INVLPG_ALL; + op.arg1.linear_addr = va & ~PAGE_MASK; + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_load_cr3(u_int val) +{ + struct mmuext_op op; + int err; +#ifdef INVARIANTS + SET_VCPU(); + + KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX)); +#endif + op.cmd = MMUEXT_NEW_BASEPTR; + op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT; + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_restore_flags(u_int eflags) +{ + + if (eflags > 1) + eflags = ((eflags & PSL_I) == 0); + + __restore_flags(eflags); +} + +int +xen_save_and_cli(void) +{ + int eflags; + + __save_and_cli(eflags); + return (eflags); +} + +void +xen_cli(void) +{ + __cli(); +} + +void +xen_sti(void) +{ + __sti(); +} + +u_int +xen_rcr2(void) +{ + + return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2); +} + +void +_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line) +{ + SET_VCPU(); + + if (__predict_true(gdtset)) + critical_enter(); + XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE; + XPQ_QUEUE[XPQ_IDX].val = pfn; +#ifdef INVARIANTS + XPQ_QUEUE_LOG[XPQ_IDX].file = file; + XPQ_QUEUE_LOG[XPQ_IDX].line = line; +#endif + xen_increment_idx(); + if (__predict_true(gdtset)) + critical_exit(); +} + +void +_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line) +{ + SET_VCPU(); +#if 0 + if (__predict_true(gdtset)) + mtx_assert(&vm_page_queue_mtx, MA_OWNED); +#endif + + KASSERT((ptr & 7) == 0, ("misaligned update")); + + if (__predict_true(gdtset)) + critical_enter(); + + XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE; + XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val; +#ifdef INVARIANTS + XPQ_QUEUE_LOG[XPQ_IDX].file = file; + XPQ_QUEUE_LOG[XPQ_IDX].line = line; +#endif + xen_increment_idx(); + if (__predict_true(gdtset)) + critical_exit(); +} + +void +xen_pgdpt_pin(vm_paddr_t ma) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_PIN_L3_TABLE; + op.arg1.mfn = ma >> PAGE_SHIFT; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_pgd_pin(vm_paddr_t ma) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_PIN_L2_TABLE; + op.arg1.mfn = ma >> PAGE_SHIFT; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_pgd_unpin(vm_paddr_t ma) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = ma >> PAGE_SHIFT; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_pt_pin(vm_paddr_t ma) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_PIN_L1_TABLE; + op.arg1.mfn = ma >> PAGE_SHIFT; + printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn); + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_pt_unpin(vm_paddr_t ma) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_UNPIN_TABLE; + op.arg1.mfn = ma >> PAGE_SHIFT; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_set_ldt(vm_paddr_t ptr, unsigned long len) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_SET_LDT; + op.arg1.linear_addr = ptr; + op.arg2.nr_ents = len; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void xen_tlb_flush(void) +{ + struct mmuext_op op; + int err; + op.cmd = MMUEXT_TLB_FLUSH_LOCAL; + xen_flush_queue(); + err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF); + KASSERT(err >= 0, ("mmuext_op failed")); +} + +void +xen_update_descriptor(union descriptor *table, union descriptor *entry) +{ + vm_paddr_t pa; + pt_entry_t *ptp; + + ptp = vtopte((vm_offset_t)table); + pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK); + if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry)) + panic("HYPERVISOR_update_descriptor failed\n"); +} + + +#if 0 +/* + * Bitmap is indexed by page number. If bit is set, the page is part of a + * xen_create_contiguous_region() area of memory. + */ +unsigned long *contiguous_bitmap; + +static void +contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages) +{ + unsigned long start_off, end_off, curr_idx, end_idx; + + curr_idx = first_page / BITS_PER_LONG; + start_off = first_page & (BITS_PER_LONG-1); + end_idx = (first_page + nr_pages) / BITS_PER_LONG; + end_off = (first_page + nr_pages) & (BITS_PER_LONG-1); + + if (curr_idx == end_idx) { + contiguous_bitmap[curr_idx] |= + ((1UL<> PAGE_SHIFT; + mfn = PFNTOMFN(pfn); + PFNTOMFN(pfn) = INVALID_P2M_ENTRY; + err = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + KASSERT(err == 1, ("memory_op failed")); + } + + + /* 2. Get a new contiguous memory extent. */ + reservation.extent_order = order; + /* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not + * running with a broxen driver XXXEN + */ + reservation.address_bits = 31; + if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1) + goto fail; + + /* 3. Map the new extent in place of old pages. */ + for (i = 0; i < (1 << order); i++) { + int pfn; + pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; + xen_machphys_update(mfn+i, pfn); + PFNTOMFN(pfn) = mfn+i; + } + + xen_tlb_flush(); + +#if 0 + contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order); +#endif + + balloon_unlock(flags); + + return 0; + + fail: + reservation.extent_order = 0; + reservation.address_bits = 0; + + for (i = 0; i < (1 << order); i++) { + int pfn; + pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT; + err = HYPERVISOR_memory_op( + XENMEM_increase_reservation, &reservation); + KASSERT(err == 1, ("memory_op failed")); + xen_machphys_update(mfn, pfn); + PFNTOMFN(pfn) = mfn; + } + + xen_tlb_flush(); + + balloon_unlock(flags); + + return ENOMEM; +} + +void +xen_destroy_contiguous_region(void *addr, int npages) +{ + unsigned long mfn, i, flags, order, pfn0; + int err; + struct xen_memory_reservation reservation = { + .nr_extents = 1, + .extent_order = 0, + .domid = DOMID_SELF + }; + set_xen_guest_handle(reservation.extent_start, &mfn); + + pfn0 = vtophys(addr) >> PAGE_SHIFT; +#if 0 + scrub_pages(vstart, 1 << order); +#endif + /* can currently only handle power of two allocation */ + KASSERT(ffs(npages) == fls(npages), ("non-power of 2 page count")); + + /* 0. determine order */ + order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages); + + balloon_lock(flags); + +#if 0 + contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order); +#endif + + /* 1. Zap current PTEs, giving away the underlying pages. */ + for (i = 0; i < (1 << order); i++) { + int pfn; + uint64_t new_val = 0; + pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT; + + err = HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0); + KASSERT(err == 0, ("update_va_mapping failed")); + PFNTOMFN(pfn) = INVALID_P2M_ENTRY; + err = HYPERVISOR_memory_op( + XENMEM_decrease_reservation, &reservation); + KASSERT(err == 1, ("memory_op failed")); + } + + /* 2. Map new pages in place of old pages. */ + for (i = 0; i < (1 << order); i++) { + int pfn; + uint64_t new_val; + pfn = pfn0 + i; + err = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation); + KASSERT(err == 1, ("memory_op failed")); + + new_val = mfn << PAGE_SHIFT; + err = HYPERVISOR_update_va_mapping( + (vm_offset_t)addr + (i * PAGE_SIZE), + new_val, PG_KERNEL); + KASSERT(err == 0, ("update_va_mapping failed")); + xen_machphys_update(mfn, pfn); + PFNTOMFN(pfn) = mfn; + } + + xen_tlb_flush(); + + balloon_unlock(flags); +} + +extern unsigned long cpu0prvpage; +extern unsigned long *SMPpt; +extern struct user *proc0uarea; +extern vm_offset_t proc0kstack; +extern int vm86paddr, vm86phystk; +char *bootmem_start, *bootmem_current, *bootmem_end; + +pteinfo_t *pteinfo_list; +void initvalues(start_info_t *startinfo); + +struct ringbuf_head *xen_store; /* XXX move me */ +char *console_page; + +void * +bootmem_alloc(unsigned int size) +{ + char *retptr; + + retptr = bootmem_current; + KASSERT(retptr + size <= bootmem_end, ("bootmem_alloc failed")); + bootmem_current += size; + + return retptr; +} + +void +bootmem_free(void *ptr, unsigned int size) +{ + char *tptr; + + tptr = ptr; + KASSERT(tptr == bootmem_current - size && + bootmem_current - size >= bootmem_start, + ("bootmem_free failed")); + + bootmem_current -= size; +} + +#if 0 +static vm_paddr_t +xpmap_mtop2(vm_paddr_t mpa) +{ + return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT) + ) | (mpa & ~PG_FRAME); +} + +static pd_entry_t +xpmap_get_bootpde(vm_paddr_t va) +{ + + return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22]; +} + +static pd_entry_t +xpmap_get_vbootpde(vm_paddr_t va) +{ + pd_entry_t pde; + + pde = xpmap_get_bootpde(va); + if ((pde & PG_V) == 0) + return (pde & ~PG_FRAME); + return (pde & ~PG_FRAME) | + (xpmap_mtop2(pde & PG_FRAME) + KERNBASE); +} + +static pt_entry_t 8* +xpmap_get_bootptep(vm_paddr_t va) +{ + pd_entry_t pde; + + pde = xpmap_get_vbootpde(va); + if ((pde & PG_V) == 0) + return (void *)-1; +#define PT_MASK 0x003ff000 /* page table address bits */ + return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]); +} + +static pt_entry_t +xpmap_get_bootpte(vm_paddr_t va) +{ + + return xpmap_get_bootptep(va)[0]; +} +#endif + + +#ifdef ADD_ISA_HOLE +static void +shift_phys_machine(unsigned long *phys_machine, int nr_pages) +{ + + unsigned long *tmp_page, *current_page, *next_page; + int i; + + tmp_page = bootmem_alloc(PAGE_SIZE); + current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long)); + next_page = current_page - (PAGE_SIZE/sizeof(unsigned long)); + bcopy(phys_machine, tmp_page, PAGE_SIZE); + + while (current_page > phys_machine) { + /* save next page */ + bcopy(next_page, tmp_page, PAGE_SIZE); + /* shift down page */ + bcopy(current_page, next_page, PAGE_SIZE); + /* finish swap */ + bcopy(tmp_page, current_page, PAGE_SIZE); + + current_page -= (PAGE_SIZE/sizeof(unsigned long)); + next_page -= (PAGE_SIZE/sizeof(unsigned long)); + } + bootmem_free(tmp_page, PAGE_SIZE); + + for (i = 0; i < nr_pages; i++) { + xen_machphys_update(phys_machine[i], i); + } + memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE); + +} +#endif /* ADD_ISA_HOLE */ + +/* + * Build a directory of the pages that make up our Physical to Machine + * mapping table. The Xen suspend/restore code uses this to find our + * mapping table. + */ +static void +init_frame_list_list(void *arg) +{ + unsigned long nr_pages = xen_start_info->nr_pages; +#define FPP (PAGE_SIZE/sizeof(xen_pfn_t)) + int i, j, k; + + xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + for (i = 0, j = 0, k = -1; i < nr_pages; + i += FPP, j++) { + if ((j & (FPP - 1)) == 0) { + k++; + xen_pfn_to_mfn_frame_list[k] = + malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + xen_pfn_to_mfn_frame_list_list[k] = + VTOMFN(xen_pfn_to_mfn_frame_list[k]); + j = 0; + } + xen_pfn_to_mfn_frame_list[k][j] = + VTOMFN(&xen_phys_machine[i]); + } + + HYPERVISOR_shared_info->arch.max_pfn = nr_pages; + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list + = VTOMFN(xen_pfn_to_mfn_frame_list_list); +} +SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL); + +extern unsigned long physfree; + +int pdir, curoffset; +extern int nkpt; + +void +initvalues(start_info_t *startinfo) +{ + int l3_pages, l2_pages, l1_pages, offset; + vm_offset_t cur_space, cur_space_pt; + struct physdev_set_iopl set_iopl; + + vm_paddr_t KPTphys, IdlePTDma; + vm_paddr_t console_page_ma, xen_store_ma; + vm_offset_t KPTphysoff, tmpva; + vm_paddr_t shinfo; +#ifdef PAE + vm_paddr_t IdlePDPTma, IdlePDPTnewma; + vm_paddr_t IdlePTDnewma[4]; + pd_entry_t *IdlePDPTnew, *IdlePTDnew; +#else + vm_paddr_t pdir_shadow_ma; +#endif + unsigned long i; + int ncpus, err; + + nkpt = min( + min( + max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt), + NPGPTD*NPDEPG - KPTDI), + (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT); + +#ifdef SMP + ncpus = MAXCPU; +#else + ncpus = 1; +#endif + + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); +#ifdef notyet + /* + * need to install handler + */ + HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify); +#endif + xen_start_info = startinfo; + xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list; + + IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE); + l1_pages = 0; + +#ifdef PAE + l3_pages = 1; + l2_pages = 0; + IdlePDPT = (pd_entry_t *)startinfo->pt_base; + IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base)); + for (i = (KERNBASE >> 30); + (i < 4) && (IdlePDPT[i] != 0); i++) + l2_pages++; + /* + * Note that only one page directory has been allocated at this point. + * Thus, if KERNBASE + */ +#if 0 + for (i = 0; i < l2_pages; i++) + IdlePTDma[i] = xpmap_ptom(VTOP(IdlePTD + i*PAGE_SIZE)); +#endif + + l2_pages = (l2_pages == 0) ? 1 : l2_pages; +#else + l3_pages = 0; + l2_pages = 1; +#endif + for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT); + (i>PDRSHIFT)); i++) { + + if (IdlePTD[i] == 0) + break; + l1_pages++; + } + + /* number of pages allocated after the pts + 1*/; + cur_space = xen_start_info->pt_base + + ((xen_start_info->nr_pt_frames) + 3 )*PAGE_SIZE; + printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space); + + printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n", + KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base), + xen_start_info->nr_pt_frames); + xendebug_flags = 0; /* 0xffffffff; */ + + /* allocate 4 pages for bootmem allocator */ + bootmem_start = bootmem_current = (char *)cur_space; + cur_space += (4 * PAGE_SIZE); + bootmem_end = (char *)cur_space; + + /* allocate page for gdt */ + gdt = (union descriptor *)cur_space; + cur_space += PAGE_SIZE*ncpus; + + /* allocate page for ldt */ + ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE; + cur_space += PAGE_SIZE; + + HYPERVISOR_shared_info = (shared_info_t *)cur_space; + cur_space += PAGE_SIZE; + + xen_store = (struct ringbuf_head *)cur_space; + cur_space += PAGE_SIZE; + + console_page = (char *)cur_space; + cur_space += PAGE_SIZE; + +#ifdef ADD_ISA_HOLE + shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages); +#endif + /* + * pre-zero unused mapped pages - mapped on 4MB boundary + */ +#ifdef PAE + IdlePDPT = (pd_entry_t *)startinfo->pt_base; + IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base)); + /* + * Note that only one page directory has been allocated at this point. + * Thus, if KERNBASE + */ + IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE); + IdlePTDma = xpmap_ptom(VTOP(IdlePTD)); + l3_pages = 1; +#else + IdlePTD = (pd_entry_t *)startinfo->pt_base; + IdlePTDma = xpmap_ptom(VTOP(startinfo->pt_base)); + l3_pages = 0; +#endif + l2_pages = 1; + l1_pages = xen_start_info->nr_pt_frames - l2_pages - l3_pages; + + KPTphysoff = (l2_pages + l3_pages)*PAGE_SIZE; + + KPTphys = xpmap_ptom(VTOP(startinfo->pt_base + KPTphysoff)); + XENPRINTF("IdlePTD %p\n", IdlePTD); + XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx " + "mod_start: 0x%lx mod_len: 0x%lx\n", + xen_start_info->nr_pages, xen_start_info->shared_info, + xen_start_info->flags, xen_start_info->pt_base, + xen_start_info->mod_start, xen_start_info->mod_len); + /* Map proc0's KSTACK */ + + proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE); + printk("proc0kstack=%u\n", proc0kstack); + + /* vm86/bios stack */ + cur_space += PAGE_SIZE; + + /* Map space for the vm86 region */ + vm86paddr = (vm_offset_t)cur_space; + cur_space += (PAGE_SIZE * 3); + +#ifdef PAE + IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE; + bzero(IdlePDPTnew, PAGE_SIZE); + + IdlePDPTnewma = xpmap_ptom(VTOP(IdlePDPTnew)); + IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE; + bzero(IdlePTDnew, 4*PAGE_SIZE); + + for (i = 0; i < 4; i++) + IdlePTDnewma[i] = + xpmap_ptom(VTOP((uint8_t *)IdlePTDnew + i*PAGE_SIZE)); + /* + * L3 + * + * Copy the 4 machine addresses of the new PTDs in to the PDPT + * + */ + for (i = 0; i < 4; i++) + IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V; + + __asm__("nop;"); + /* + * + * re-map the new PDPT read-only + */ + PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V); + /* + * + * Unpin the current PDPT + */ + xen_pt_unpin(IdlePDPTma); + + for (i = 0; i < 20; i++) { + int startidx = ((KERNBASE >> 18) & PAGE_MASK) >> 3; + + if (IdlePTD[startidx + i] == 0) { + l1_pages = i; + break; + } + } + +#endif /* PAE */ + + /* unmap remaining pages from initial 4MB chunk + * + */ + for (tmpva = cur_space; (tmpva & ((1<<22)-1)) != 0; tmpva += PAGE_SIZE) { + bzero((char *)tmpva, PAGE_SIZE); + PT_SET_MA(tmpva, (vm_paddr_t)0); + } + + PT_UPDATES_FLUSH(); + + memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)), + ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK), + l1_pages*sizeof(pt_entry_t)); + + for (i = 0; i < 4; i++) { + PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE, + IdlePTDnewma[i] | PG_V); + } + xen_load_cr3(VTOP(IdlePDPTnew)); + xen_pgdpt_pin(xpmap_ptom(VTOP(IdlePDPTnew))); + + /* allocate remainder of nkpt pages */ + cur_space_pt = cur_space; + for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt; + i++, cur_space += PAGE_SIZE) { + pdir = (offset + i) / NPDEPG; + curoffset = ((offset + i) % NPDEPG); + if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS) + break; + + /* + * make sure that all the initial page table pages + * have been zeroed + */ + PT_SET_MA(cur_space_pt, + xpmap_ptom(VTOP(cur_space)) | PG_V | PG_RW); + bzero((char *)cur_space_pt, PAGE_SIZE); + PT_SET_MA(cur_space_pt, (vm_paddr_t)0); + xen_pt_pin(xpmap_ptom(VTOP(cur_space))); + xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + + curoffset*sizeof(vm_paddr_t)), + xpmap_ptom(VTOP(cur_space)) | PG_KERNEL); + PT_UPDATES_FLUSH(); + } + + for (i = 0; i < 4; i++) { + pdir = (PTDPTDI + i) / NPDEPG; + curoffset = (PTDPTDI + i) % NPDEPG; + + xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] + + curoffset*sizeof(vm_paddr_t)), + IdlePTDnewma[i] | PG_V); + } + + PT_UPDATES_FLUSH(); + + IdlePTD = IdlePTDnew; + IdlePDPT = IdlePDPTnew; + IdlePDPTma = IdlePDPTnewma; + + /* + * shared_info is an unsigned long so this will randomly break if + * it is allocated above 4GB - I guess people are used to that + * sort of thing with Xen ... sigh + */ + shinfo = xen_start_info->shared_info; + PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL); + + printk("#4\n"); + + xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT); + PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL); + console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT); + PT_SET_MA(console_page, console_page_ma | PG_KERNEL); + + printk("#5\n"); + + set_iopl.iopl = 1; + err = HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl); + KASSERT(err == 0, ("physdev_op failed")); + printk("#6\n"); +#if 0 + /* add page table for KERNBASE */ + xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), + xpmap_ptom(VTOP(cur_space) | PG_KERNEL)); + xen_flush_queue(); +#ifdef PAE + xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), + xpmap_ptom(VTOP(cur_space) | PG_V | PG_A)); +#else + xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), + xpmap_ptom(VTOP(cur_space) | PG_V | PG_A)); +#endif + xen_flush_queue(); + cur_space += PAGE_SIZE; + printk("#6\n"); +#endif /* 0 */ +#ifdef notyet + if (xen_start_info->flags & SIF_INITDOMAIN) { + /* Map first megabyte */ + for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) + PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD); + xen_flush_queue(); + } +#endif + /* + * re-map kernel text read-only + * + */ + for (i = (((vm_offset_t)&btext) & ~PAGE_MASK); + i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE) + PT_SET_MA(i, xpmap_ptom(VTOP(i)) | PG_V | PG_A); + + printk("#7\n"); + physfree = VTOP(cur_space); + init_first = physfree >> PAGE_SHIFT; + IdlePTD = (pd_entry_t *)VTOP(IdlePTD); + IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT); + setup_xen_features(); + printk("#8, proc0kstack=%u\n", proc0kstack); +} + + +trap_info_t trap_table[] = { + { 0, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)}, + { 1, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)}, + { 3, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)}, + { 4, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)}, + /* This is UPL on Linux and KPL on BSD */ + { 5, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)}, + { 6, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)}, + { 7, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)}, + /* + * { 8, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)}, + * no handler for double fault + */ + { 9, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)}, + {10, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)}, + {11, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)}, + {12, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)}, + {13, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)}, + {14, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)}, + {15, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)}, + {16, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)}, + {17, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)}, + {18, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)}, + {19, 0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)}, + {0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)}, + { 0, 0, 0, 0 } +}; + +/********** CODE WORTH KEEPING ABOVE HERE *****************/ + +void xen_failsafe_handler(void); + +void +xen_failsafe_handler(void) +{ + + panic("xen_failsafe_handler called!\n"); +} + +void xen_handle_thread_switch(struct pcb *pcb); + +/* This is called by cpu_switch() when switching threads. */ +/* The pcb arg refers to the process control block of the */ +/* next thread which is to run */ +void +xen_handle_thread_switch(struct pcb *pcb) +{ + uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0]; + uint32_t *b = (uint32_t *)&pcb->pcb_fsd; + multicall_entry_t mcl[3]; + int i = 0; + + /* Notify Xen of task switch */ + mcl[i].op = __HYPERVISOR_stack_switch; + mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL); + mcl[i++].args[1] = (unsigned long)pcb; + + /* Check for update of fsd */ + if (*a != *b || *(a+1) != *(b+1)) { + mcl[i].op = __HYPERVISOR_update_descriptor; + *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); + *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; + } + + a += 2; + b += 2; + + /* Check for update of gsd */ + if (*a != *b || *(a+1) != *(b+1)) { + mcl[i].op = __HYPERVISOR_update_descriptor; + *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a); + *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b; + } + + (void)HYPERVISOR_multicall(mcl, i); +} Property changes on: i386/xen/xen_machdep.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/xen/mp_machdep.c =================================================================== --- i386/xen/mp_machdep.c (.../stable/6/sys) (revision 0) +++ i386/xen/mp_machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1407 @@ +/*- + * Copyright (c) 1996, by Steve Passe + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. The name of the developer may NOT be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_apic.h" +#include "opt_cpu.h" +#include "opt_kdb.h" +#include "opt_kstack_pages.h" +#include "opt_mp_watchdog.h" +#include "opt_sched.h" + +#if !defined(lint) +#if !defined(SMP) +#error How did you get here? +#endif + +#ifndef DEV_APIC +#error The apic device is required for SMP, add "device apic" to your config file. +#endif +#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT) +#error SMP not supported with CPU_DISABLE_CMPXCHG +#endif +#endif /* not lint */ + +#include +#include +#include +#include /* cngetc() */ +#ifdef GPROF +#include +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include /** COUNT_XINVLTLB_HITS */ +#include + +#include +#include +#include +#include +#include +#include + +#define WARMBOOT_TARGET 0 +#define WARMBOOT_OFF (KERNBASE + 0x0467) +#define WARMBOOT_SEG (KERNBASE + 0x0469) + +#define CMOS_REG (0x70) +#define CMOS_DATA (0x71) +#define BIOS_RESET (0x0f) +#define BIOS_WARM (0x0a) + +/* + * this code MUST be enabled here and in mpboot.s. + * it follows the very early stages of AP boot by placing values in CMOS ram. + * it NORMALLY will never be needed and thus the primitive method for enabling. + * +#define CHECK_POINTS + */ + +/* lock region used by kernel profiling */ +int mcount_lock; + +/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */ +int current_postcode; + +int mp_naps; /* # of Applications processors */ +int boot_cpu_id = -1; /* designated BSP */ +extern int nkpt; + +extern struct pcpu __pcpu[]; + +/* + * CPU topology map datastructures for HTT. + */ +static struct cpu_group mp_groups[MAXCPU]; +static struct cpu_top mp_top; + +/* AP uses this during bootstrap. Do not staticize. */ +char *bootSTK; +static int bootAP; +static union descriptor *bootAPgdt; + +static char resched_name[MAX_VIRT_CPUS][15]; +static char callfunc_name[MAX_VIRT_CPUS][15]; + +/* Free these after use */ +void *bootstacks[MAXCPU]; + +/* Hotwire a 0->4MB V==P mapping */ +extern pt_entry_t *KPTphys; + +struct pcb stoppcbs[MAXCPU]; + +/* Variables needed for SMP tlb shootdown. */ +vm_offset_t smp_tlb_addr1; +vm_offset_t smp_tlb_addr2; +volatile int smp_tlb_wait; + +typedef void call_data_func_t(uintptr_t , uintptr_t); + + +#ifdef COUNT_IPIS +/* Interrupt counts. */ +#ifdef IPI_PREEMPTION +static u_long *ipi_preempt_counts[MAXCPU]; +#endif +static u_long *ipi_ast_counts[MAXCPU]; +u_long *ipi_invltlb_counts[MAXCPU]; +u_long *ipi_invlrng_counts[MAXCPU]; +u_long *ipi_invlpg_counts[MAXCPU]; +u_long *ipi_invlcache_counts[MAXCPU]; +u_long *ipi_rendezvous_counts[MAXCPU]; +u_long *ipi_lazypmap_counts[MAXCPU]; +#endif + +/* + * Local data and functions. + */ + +static u_int logical_cpus; + +/* used to hold the AP's until we are ready to release them */ +static struct mtx ap_boot_mtx; + +/* Set to 1 once we're ready to let the APs out of the pen. */ +static volatile int aps_ready = 0; + +/* + * Store data from cpu_add() until later in the boot when we actually setup + * the APs. + */ +struct cpu_info { + int cpu_present:1; + int cpu_bsp:1; + int cpu_disabled:1; +} static cpu_info[MAX_APIC_ID + 1]; +static int cpu_apic_ids[MAXCPU]; + +/* Holds pending bitmap based IPIs per CPU */ +static volatile u_int cpu_ipi_pending[MAXCPU]; + +static u_int boot_address; + +static void assign_cpu_ids(void); +static void set_interrupt_apic_ids(void); +static int start_all_aps(void); +static int start_ap(int apic_id); +static void release_aps(void *dummy); + +static u_int hyperthreading_cpus; +static cpumask_t hyperthreading_cpus_mask; +extern void Xhypervisor_callback(void); +extern void failsafe_callback(void); +extern void pmap_lazyfix_action(void); + +void +mp_topology(void) +{ + struct cpu_group *group; + int logical_cpus; + int apic_id; + int groups; + int cpu; + + /* Build the smp_topology map. */ + /* Nothing to do if there is no HTT support. */ + if ((cpu_feature & CPUID_HTT) == 0) + return; + logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; + if (logical_cpus <= 1) + return; + group = &mp_groups[0]; + groups = 1; + for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) { + if (!cpu_info[apic_id].cpu_present) + continue; + /* + * If the current group has members and we're not a logical + * cpu, create a new group. + */ + if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) { + group++; + groups++; + } + group->cg_count++; + group->cg_mask |= 1 << cpu; + cpu++; + } + + mp_top.ct_count = groups; + mp_top.ct_group = mp_groups; + smp_topology = &mp_top; +} + + +/* + * Calculate usable address in base memory for AP trampoline code. + */ +u_int +mp_bootaddress(u_int basemem) +{ + + return (basemem); +} + +void +cpu_add(u_int apic_id, char boot_cpu) +{ + + if (apic_id > MAX_APIC_ID) { + panic("SMP: APIC ID %d too high", apic_id); + return; + } + KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice", + apic_id)); + cpu_info[apic_id].cpu_present = 1; + if (boot_cpu) { + KASSERT(boot_cpu_id == -1, + ("CPU %d claims to be BSP, but CPU %d already is", apic_id, + boot_cpu_id)); + boot_cpu_id = apic_id; + cpu_info[apic_id].cpu_bsp = 1; + } + if (mp_ncpus < MAXCPU) + mp_ncpus++; + if (bootverbose) + printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" : + "AP"); +} + +void +cpu_mp_setmaxid(void) +{ + + mp_maxid = MAXCPU - 1; +} + +int +cpu_mp_probe(void) +{ + + /* + * Always record BSP in CPU map so that the mbuf init code works + * correctly. + */ + all_cpus = 1; + if (mp_ncpus == 0) { + /* + * No CPUs were found, so this must be a UP system. Setup + * the variables to represent a system with a single CPU + * with an id of 0. + */ + mp_ncpus = 1; + return (0); + } + + /* At least one CPU was found. */ + if (mp_ncpus == 1) { + /* + * One CPU was found, so this must be a UP system with + * an I/O APIC. + */ + return (0); + } + + /* At least two CPUs were found. */ + return (1); +} + +/* + * Initialize the IPI handlers and start up the AP's. + */ +void +cpu_mp_start(void) +{ + int i; + + /* Initialize the logical ID to APIC ID table. */ + for (i = 0; i < MAXCPU; i++) { + cpu_apic_ids[i] = -1; + cpu_ipi_pending[i] = 0; + } + +#if 0 + /* + * IPI list that has to be converted to Xen + * + */ + /* Install an inter-CPU IPI for TLB invalidation */ + setidt(IPI_INVLTLB, IDTVEC(invltlb), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_INVLPG, IDTVEC(invlpg), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setidt(IPI_INVLRNG, IDTVEC(invlrng), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* Install an inter-CPU IPI for cache invalidation. */ + setidt(IPI_INVLCACHE, IDTVEC(invlcache), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* Install an inter-CPU IPI for lazy pmap release */ + setidt(IPI_LAZYPMAP, IDTVEC(lazypmap), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* Install an inter-CPU IPI for all-CPU rendezvous */ + setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* Install generic inter-CPU IPI handler */ + setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* Install an inter-CPU IPI for CPU stop/restart */ + setidt(IPI_STOP, IDTVEC(cpustop), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + + /* Set boot_cpu_id if needed. */ + if (boot_cpu_id == -1) { + boot_cpu_id = PCPU_GET(apic_id); + cpu_info[boot_cpu_id].cpu_bsp = 1; + } else + KASSERT(boot_cpu_id == PCPU_GET(apic_id), + ("BSP's APIC ID doesn't match boot_cpu_id")); + cpu_apic_ids[0] = boot_cpu_id; + + assign_cpu_ids(); + + /* Start each Application Processor */ + start_all_aps(); + + /* Setup the initial logical CPUs info. */ + logical_cpus = logical_cpus_mask = 0; + if (cpu_feature & CPUID_HTT) + logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; + + set_interrupt_apic_ids(); +} + + +static void +iv_rendezvous(uintptr_t a, uintptr_t b) +{ + smp_rendezvous_action(); +} + +static void +iv_invltlb(uintptr_t a, uintptr_t b) +{ + xen_tlb_flush(); +} + +static void +iv_invlpg(uintptr_t a, uintptr_t b) +{ + xen_invlpg(a); +} + +static void +iv_invlrng(uintptr_t a, uintptr_t b) +{ + vm_offset_t start = (vm_offset_t)a; + vm_offset_t end = (vm_offset_t)b; + + while (start < end) { + xen_invlpg(start); + start += PAGE_SIZE; + } +} + + +static void +iv_invlcache(uintptr_t a, uintptr_t b) +{ + + wbinvd(); +} + +static void +iv_lazypmap(uintptr_t a, uintptr_t b) +{ + pmap_lazyfix_action(); +} + + +static void +iv_noop(uintptr_t a, uintptr_t b) +{ +} + +static call_data_func_t *ipi_vectors[IPI_BITMAP_VECTOR] = +{ + iv_noop, + iv_noop, + iv_rendezvous, + iv_invltlb, + iv_invlpg, + iv_invlrng, + iv_invlcache, + iv_lazypmap, +}; + +/* + * Reschedule call back. Nothing to do, + * all the work is done automatically when + * we return from the interrupt. + */ +static void +smp_reschedule_interrupt(void *unused) +{ + int cpu = PCPU_GET(cpuid); + u_int ipi_bitmap; + + ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); + +#ifdef IPI_PREEMPTION + if (ipi_bitmap & (1 << IPI_PREEMPT)) { +#ifdef COUNT_IPIS + *ipi_preempt_counts[cpu]++; +#endif + mtx_lock_spin(&sched_lock); + /* Don't preempt the idle thread */ + if (curthread != PCPU_GET(idlethread)) { + struct thread *running_thread = curthread; + if (running_thread->td_critnest > 1) + running_thread->td_owepreempt = 1; + else + mi_switch(SW_INVOL | SW_PREEMPT, NULL); + } + mtx_unlock_spin(&sched_lock); + } +#endif + + if (ipi_bitmap & (1 << IPI_AST)) { +#ifdef COUNT_IPIS + *ipi_ast_counts[cpu]++; +#endif + /* Nothing to do for AST */ + } +} + +struct _call_data { + uint16_t func_id; + uint16_t wait; + uintptr_t arg1; + uintptr_t arg2; + atomic_t started; + atomic_t finished; +}; + +static struct _call_data *call_data; + +static void +smp_call_function_interrupt(void *arg) +{ + call_data_func_t *func; + uintptr_t arg1 = call_data->arg1; + uintptr_t arg2 = call_data->arg2; + int wait = call_data->wait; + atomic_t *started = &call_data->started; + atomic_t *finished = &call_data->finished; + + if (call_data->func_id > IPI_BITMAP_VECTOR) + panic("invalid function id %u", call_data->func_id); + + func = ipi_vectors[call_data->func_id]; + /* + * Notify initiating CPU that I've grabbed the data and am + * about to execute the function + */ + mb(); + atomic_inc(started); + /* + * At this point the info structure may be out of scope unless wait==1 + */ + (*func)(arg1, arg2); + + if (wait) { + mb(); + atomic_inc(finished); + } + atomic_add_int(&smp_tlb_wait, 1); +} + +/* + * Print various information about the SMP system hardware and setup. + */ +void +cpu_mp_announce(void) +{ + int i, x; + + /* List CPUs */ + printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); + for (i = 1, x = 0; x <= MAX_APIC_ID; x++) { + if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp) + continue; + if (cpu_info[x].cpu_disabled) + printf(" cpu (AP): APIC ID: %2d (disabled)\n", x); + else { + KASSERT(i < mp_ncpus, + ("mp_ncpus and actual cpus are out of whack")); + printf(" cpu%d (AP): APIC ID: %2d\n", i++, x); + } + } +} + +static int +xen_smp_intr_init(unsigned int cpu) +{ + int rc; + unsigned int irq; + + pc->pc_resched_irq = pc->pc_callfunc_irq = ~0; + + sprintf(resched_name[cpu], "resched%u", cpu); + rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR, + cpu, + resched_name[cpu], + smp_reschedule_interrupt, + INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq); + + printf("cpu=%d irq=%d vector=%d\n", + cpu, pc->pc_resched_irq, RESCHEDULE_VECTOR); + + per_cpu(resched_irq, cpu) = irq; + + sprintf(callfunc_name[cpu], "callfunc%u", cpu); + rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR, + cpu, + callfunc_name[cpu], + smp_call_function_interrupt, + INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq); + if (rc < 0) + goto fail; + per_cpu(callfunc_irq, cpu) = irq; + + printf("cpu=%d irq=%d vector=%d\n", + cpu, pc->pc_callfunc_irq, CALL_FUNCTION_VECTOR); + + if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) + goto fail; + + return 0; + + fail: + if (per_cpu(resched_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(resched_irq, cpu)); + if (per_cpu(callfunc_irq, cpu) >= 0) + unbind_from_irqhandler(per_cpu(callfunc_irq, cpu)); + return rc; +} + +static void +xen_smp_intr_init_cpus(void *unused) +{ + int i; + + for (i = 0; i < mp_ncpus; i++) + xen_smp_intr_init(i); +} + +#define MTOPSIZE (1<<(14 + PAGE_SHIFT)) +/* + * AP CPU's call this to initialize themselves. + */ +void +init_secondary(void) +{ + vm_offset_t addr; + int gsel_tss; + + /* bootAP is set in start_ap() to our ID. */ + PCPU_SET(currentldt, _default_ldt); + + gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); +#if 0 + gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; +#endif + PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */ + PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); + PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); +#if 0 + PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd); + PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); + ltr(gsel_tss); +#endif + PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); + + /* signal our startup to the BSP. */ + mp_naps++; + + /* Spin until the BSP releases the AP's. */ + while (!aps_ready) + ia32_pause(); + + /* BSP may have changed PTD while we were waiting */ + invltlb(); + for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE) + invlpg(addr); + + /* set up FPU state on the AP */ + npxinit(__INITIAL_NPXCW__); + +#if 0 + /* set up SSE registers */ + enable_sse(); + + /* A quick check from sanity claus */ + if (PCPU_GET(apic_id) != lapic_id()) { + printf("SMP: cpuid = %d\n", PCPU_GET(cpuid)); + printf("SMP: actual apic_id = %d\n", lapic_id()); + printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id)); + panic("cpuid mismatch! boom!!"); + } +#endif + /* Initialize curthread. */ + KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); + PCPU_SET(curthread, PCPU_GET(idlethread)); + + mtx_lock_spin(&ap_boot_mtx); +#if 0 + /* Init local apic for irq's */ + lapic_setup(1); + + /* Set memory range attributes for this CPU to match the BSP */ + mem_range_AP_init(); +#endif + smp_cpus++; + + CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); + printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + + /* Determine if we are a logical CPU. */ + if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) + logical_cpus_mask |= PCPU_GET(cpumask); + + /* Determine if we are a hyperthread. */ + if (hyperthreading_cpus > 1 && + PCPU_GET(apic_id) % hyperthreading_cpus != 0) + hyperthreading_cpus_mask |= PCPU_GET(cpumask); + + /* Build our map of 'other' CPUs. */ + PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); +#if 0 + if (bootverbose) + lapic_dump("AP"); +#endif + if (smp_cpus == mp_ncpus) { + /* enable IPI's, tlb shootdown, freezes etc */ + atomic_store_rel_int(&smp_started, 1); + smp_active = 1; /* historic */ + } + + mtx_unlock_spin(&ap_boot_mtx); + + /* wait until all the AP's are up */ + while (smp_started == 0) + ia32_pause(); + + /* ok, now grab sched_lock and enter the scheduler */ + mtx_lock_spin(&sched_lock); + + /* + * Correct spinlock nesting. The idle thread context that we are + * borrowing was created so that it would start out with a single + * spin lock (sched_lock) held in fork_trampoline(). Since we've + * explicitly acquired locks in this function, the nesting count + * is now 2 rather than 1. Since we are nested, calling + * spinlock_exit() will simply adjust the counts without allowing + * spin lock using code to interrupt us. + */ + spinlock_exit(); + KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count")); + + binuptime(PCPU_PTR(switchtime)); + PCPU_SET(switchticks, ticks); + + cpu_throw(NULL, choosethread()); /* doesn't return */ + + panic("scheduler returned us to %s", __func__); + /* NOTREACHED */ +} + +/******************************************************************* + * local functions and data + */ + +/* + * We tell the I/O APIC code about all the CPUs we want to receive + * interrupts. If we don't want certain CPUs to receive IRQs we + * can simply not tell the I/O APIC code about them in this function. + * We also do not tell it about the BSP since it tells itself about + * the BSP internally to work with UP kernels and on UP machines. + */ +static void +set_interrupt_apic_ids(void) +{ + u_int apic_id; + + for (apic_id = 0; apic_id < MAXCPU; apic_id++) { + if (!cpu_info[apic_id].cpu_present) + continue; + if (cpu_info[apic_id].cpu_bsp) + continue; + if (cpu_info[apic_id].cpu_disabled) + continue; + + /* Don't let hyperthreads service interrupts. */ + if (hyperthreading_cpus > 1 && + apic_id % hyperthreading_cpus != 0) + continue; + + intr_add_cpu(apic_id); + } +} + +/* + * Assign logical CPU IDs to local APICs. + */ +static void +assign_cpu_ids(void) +{ + u_int i; + + /* Check for explicitly disabled CPUs. */ + for (i = 0; i <= MAX_APIC_ID; i++) { + if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) + continue; + + /* Don't use this CPU if it has been disabled by a tunable. */ + if (resource_disabled("lapic", i)) { + cpu_info[i].cpu_disabled = 1; + continue; + } + } + + /* + * Assign CPU IDs to local APIC IDs and disable any CPUs + * beyond MAXCPU. CPU 0 has already been assigned to the BSP, + * so we only have to assign IDs for APs. + */ + mp_ncpus = 1; + for (i = 0; i <= MAX_APIC_ID; i++) { + if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || + cpu_info[i].cpu_disabled) + continue; + + if (mp_ncpus < MAXCPU) { + cpu_apic_ids[mp_ncpus] = i; + mp_ncpus++; + } else + cpu_info[i].cpu_disabled = 1; + } + KASSERT(mp_maxid >= mp_ncpus - 1, + ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, + mp_ncpus)); +} + +/* + * start each AP in our list + */ +static int +start_all_aps(void) +{ + int apic_id, cpu, i; + struct pcpu *pc; + + mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); + + /* start each AP */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { + apic_id = cpu_apic_ids[cpu]; + + bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE); + + /* setup a vector to our boot code */ + *((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4); + + bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4; + bootAP = cpu; + bootAPgdt = gdt + (512*cpu); + + /* Get per-cpu data */ + pc = &__pcpu[bootAP]; + pcpu_init(pc, bootAP, sizeof(struct pcpu)); + pc->pc_apic_id = cpu_apic_ids[bootAP]; + pc->pc_prvspace = pc; + pc->pc_curthread = 0; + + gdt_segs[GPRIV_SEL].ssd_base = (int) pc; + gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; + + PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW); + bzero(bootAPgdt, PAGE_SIZE); + for (i = 0; i < NGDT; i++) + ssdtosd(&gdt_segs[i], &bootAPgdt[i].sd); + PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V); +#ifdef notyet + + if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { + apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); + acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); +#ifdef CONFIG_ACPI + if (acpiid != 0xff) + x86_acpiid_to_apicid[acpiid] = apicid; +#endif + } +#endif + + /* attempt to start the Application Processor */ + if (!start_ap(apic_id)) { + printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id); + /* better panic as the AP may be running loose */ + printf("panic y/n? [y] "); + if (cngetc() != 'n') + panic("bye-bye"); + } + + all_cpus |= (1 << cpu); /* record AP in CPU map */ + } + + /* build our map of 'other' CPUs */ + PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + + pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); + + /* number of APs actually started */ + return mp_naps; +} + +extern uint8_t *pcpu_boot_stack; +extern trap_info_t trap_table[]; + +static void +smp_trap_init(trap_info_t *trap_ctxt) +{ + const trap_info_t *t = trap_table; + + for (t = trap_table; t->address; t++) { + trap_ctxt[t->vector].flags = t->flags; + trap_ctxt[t->vector].cs = t->cs; + trap_ctxt[t->vector].address = t->address; + } +} + +void cpu_initialize_context(unsigned int cpu); + +void +cpu_initialize_context(unsigned int cpu) +{ + /* vcpu_guest_context_t is too large to allocate on the stack. + * Hence we allocate statically and protect it with a lock */ + vm_page_t m[4]; + static vcpu_guest_context_t ctxt; + vm_offset_t boot_stack; + vm_offset_t newPTD; + vm_paddr_t ma[NPGPTD]; + static int color; + int i, err; + + /* + * Page 0,[0-3] PTD + * Page 1, [4] boot stack + * Page [5] PDPT + + * + */ + for (i = 0; i < NPGPTD + 2; i++) { + m[i] = vm_page_alloc(NULL, color++, + VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + + pmap_zero_page(m[i]); + + } + boot_stack = kmem_alloc_nofault(kernel_map, 1); + newPTD = kmem_alloc_nofault(kernel_map, NPGPTD); + ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V; + +#ifdef PAE + pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1])); + for (i = 0; i < NPGPTD; i++) { + ((vm_paddr_t *)boot_stack)[i] = + ma[i] = + xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V; + } +#endif + + /* + * Copy cpu0 IdlePTD to new IdlePTD - copying only + * kernel mappings + */ + pmap_qenter(newPTD, m, 4); + + memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t), + (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t), + nkpt*sizeof(vm_paddr_t)); + + pmap_qremove(newPTD, 4); + kmem_free(kernel_map, newPTD, 4); + /* + * map actual idle stack to boot_stack + */ + pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD])); + + + xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]))); + vm_page_lock_queues(); + for (i = 0; i < 4; i++) { + int pdir = (PTDPTDI + i) / NPDEPG; + int curoffset = (PTDPTDI + i) % NPDEPG; + + xen_queue_pt_update((vm_paddr_t) + ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), + ma[i]); + } + PT_UPDATES_FLUSH(); + vm_page_unlock_queues(); + + memset(&ctxt, 0, sizeof(ctxt)); + ctxt.flags = VGCF_IN_KERNEL; + ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL); + ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); + ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); + ctxt.user_regs.eip = (unsigned long)init_secondary; + ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */ + + memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt)); + + smp_trap_init(ctxt.trap_ctxt); + + ctxt.ldt_ents = 0; + ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT); + ctxt.gdt_ents = 512; + +#ifdef __i386__ + ctxt.user_regs.esp = boot_stack + PAGE_SIZE; + + ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); + ctxt.kernel_sp = boot_stack + PAGE_SIZE; + + ctxt.event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); + ctxt.event_callback_eip = (unsigned long)Xhypervisor_callback; + ctxt.failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + + ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])); +#else /* __x86_64__ */ + ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs); + ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL); + ctxt.kernel_sp = idle->thread.rsp0; + + ctxt.event_callback_eip = (unsigned long)hypervisor_callback; + ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback; + ctxt.syscall_callback_eip = (unsigned long)system_call; + + ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt)); + + ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu)); +#endif + + printf("gdtpfn=%lx pdptpfn=%lx\n", + ctxt.gdt_frames[0], + ctxt.ctrlreg[3] >> PAGE_SHIFT); + + err = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt); + KASSERT(err == 0, ("VCPUOP_initialise failed")); + DELAY(3000); + err = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + KASSERT(err == 0, ("VCPUOP_up failed")); +} + +/* + * This function starts the AP (application processor) identified + * by the APIC ID 'physicalCpu'. It does quite a "song and dance" + * to accomplish this. This is necessary because of the nuances + * of the different hardware we might encounter. It isn't pretty, + * but it seems to work. + */ +static int +start_ap(int apic_id) +{ + int cpus, ms; + + /* used as a watchpoint to signal AP startup */ + cpus = mp_naps; + + cpu_initialize_context(apic_id); + + /* Wait up to 5 seconds for it to start. */ + for (ms = 0; ms < 5000; ms++) { + if (mp_naps > cpus) + return 1; /* return SUCCESS */ + DELAY(1000); + } + return 0; /* return FAILURE */ +} + +#ifdef COUNT_XINVLTLB_HITS +u_int xhits_gbl[MAXCPU]; +u_int xhits_pg[MAXCPU]; +u_int xhits_rng[MAXCPU]; +SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl, + sizeof(xhits_gbl), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg, + sizeof(xhits_pg), "IU", ""); +SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng, + sizeof(xhits_rng), "IU", ""); + +u_int ipi_global; +u_int ipi_page; +u_int ipi_range; +u_int ipi_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size, + 0, ""); + +u_int ipi_masked_global; +u_int ipi_masked_page; +u_int ipi_masked_range; +u_int ipi_masked_range_size; +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW, + &ipi_masked_global, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW, + &ipi_masked_page, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW, + &ipi_masked_range, 0, ""); +SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, + &ipi_masked_range_size, 0, ""); +#endif /* COUNT_XINVLTLB_HITS */ + +/* + * Flush the TLB on all other CPU's + */ +static void +smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) +{ + u_int ncpu; + struct _call_data data; + + call_data = &data; + + ncpu = mp_ncpus - 1; /* does not shootdown self */ + if (ncpu < 1) + return; /* no other cpus */ + if (!(read_eflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_ipi_mtx); + call_data->func_id = vector; + call_data->arg1 = addr1; + call_data->arg2 = addr2; + atomic_store_rel_int(&smp_tlb_wait, 0); + ipi_all_but_self(vector); + while (smp_tlb_wait < ncpu) + ia32_pause(); + call_data = NULL; + mtx_unlock_spin(&smp_ipi_mtx); +} + +static void +smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +{ + int ncpu, othercpus; + struct _call_data data; + + othercpus = mp_ncpus - 1; + if (mask == (u_int)-1) { + ncpu = othercpus; + if (ncpu < 1) + return; + } else { + mask &= ~PCPU_GET(cpumask); + if (mask == 0) + return; + ncpu = bitcount32(mask); + if (ncpu > othercpus) { + /* XXX this should be a panic offence */ + printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", + ncpu, othercpus); + ncpu = othercpus; + } + /* XXX should be a panic, implied by mask == 0 above */ + if (ncpu < 1) + return; + } + if (!(read_eflags() & PSL_I)) + panic("%s: interrupts disabled", __func__); + mtx_lock_spin(&smp_ipi_mtx); + call_data = &data; + call_data->func_id = vector; + call_data->arg1 = addr1; + call_data->arg2 = addr2; + atomic_store_rel_int(&smp_tlb_wait, 0); + if (mask == (u_int)-1) + ipi_all_but_self(vector); + else + ipi_selected(mask, vector); + while (smp_tlb_wait < ncpu) + ia32_pause(); + call_data = NULL; + mtx_unlock_spin(&smp_ipi_mtx); +} + +void +smp_cache_flush(void) +{ + + if (smp_started) + smp_tlb_shootdown(IPI_INVLCACHE, 0, 0); +} + +void +smp_invltlb(void) +{ + + if (smp_started) { + smp_tlb_shootdown(IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_global++; +#endif + } +} + +void +smp_invlpg(vm_offset_t addr) +{ + + if (smp_started) { + smp_tlb_shootdown(IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_page++; +#endif + } +} + +void +smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) +{ + + if (smp_started) { + smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_range++; + ipi_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +} + +void +smp_masked_invltlb(u_int mask) +{ + + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_global++; +#endif + } +} + +void +smp_masked_invlpg(u_int mask, vm_offset_t addr) +{ + + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_page++; +#endif + } +} + +void +smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) +{ + + if (smp_started) { + smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2); +#ifdef COUNT_XINVLTLB_HITS + ipi_masked_range++; + ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE; +#endif + } +} + +void +ipi_bitmap_handler(struct clockframe frame); + +void +ipi_bitmap_handler(struct clockframe frame) +{ + int cpu = PCPU_GET(cpuid); + u_int ipi_bitmap; + + ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); + +#ifdef IPI_PREEMPTION + if (ipi_bitmap & (1 << IPI_PREEMPT)) { +#ifdef COUNT_IPIS + *ipi_preempt_counts[cpu]++; +#endif + mtx_lock_spin(&sched_lock); + /* Don't preempt the idle thread */ + if (curthread != PCPU_GET(idlethread)) { + struct thread *running_thread = curthread; + if (running_thread->td_critnest > 1) + running_thread->td_owepreempt = 1; + else + mi_switch(SW_INVOL | SW_PREEMPT, NULL); + } + mtx_unlock_spin(&sched_lock); + } +#endif + + if (ipi_bitmap & (1 << IPI_AST)) { +#ifdef COUNT_IPIS + *ipi_ast_counts[cpu]++; +#endif + /* Nothing to do for AST */ + } +} + +/* + * send an IPI to a set of cpus. + */ +void +ipi_selected(uint32_t cpus, u_int ipi) +{ + int cpu; + u_int bitmap = 0; + u_int old_pending; + u_int new_pending; + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + } + + CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); + while ((cpu = ffs(cpus)) != 0) { + cpu--; + cpus &= ~(1 << cpu); + + KASSERT(cpu_apic_ids[cpu] != -1, + ("IPI to non-existent CPU %d", cpu)); + + if (bitmap) { + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending)); + + if (!old_pending) + ipi_pcpu(cpu, RESCHEDULE_VECTOR); + continue; + + } + + KASSERT(call_data != NULL, ("call_data not set")); + + ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); + } + +} + +/* + * send an IPI INTerrupt containing 'vector' to all CPUs, including myself + */ +void +ipi_all(u_int ipi) +{ + + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + ipi_selected(PCPU_GET(other_cpus), ipi); +} + +/* + * send an IPI to all CPUs EXCEPT myself + */ +void +ipi_all_but_self(u_int ipi) +{ + + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); + ipi_selected(all_cpus & ~(1< + * All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * the Systems Programming Group of the University of Utah Computer + * Science Department and William Jolitz of UUNET Technologies Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 + */ +/*- + * Copyright (c) 2003 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Jake Burkholder, + * Safeport Network Services, and Network Associates Laboratories, the + * Security Research Division of Network Associates, Inc. under + * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA + * CHATS research program. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * Manages physical address maps. + * + * In addition to hardware address maps, this + * module is called upon to provide software-use-only + * maps which may or may not be stored in the same + * form as hardware maps. These pseudo-maps are + * used to store intermediate results from copy + * operations to and from address spaces. + * + * Since the information managed by this module is + * also stored by the logical address mapping module, + * this module may throw away valid virtual-to-physical + * mappings at almost any time. However, invalidations + * of virtual-to-physical mappings must be done as + * requested. + * + * In order to cope with hardware architectures which + * make virtual-to-physical map invalidates expensive, + * this module may delay invalidate or reduced protection + * operations until such time as they are actually + * necessary. This module is given full information as + * to which processors are currently using which maps, + * and to when physical maps must be made correct. + */ + +#include "opt_cpu.h" +#include "opt_pmap.h" +#include "opt_msgbuf.h" +#include "opt_xbox.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef SMP +#include +#endif + +#ifdef XBOX +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef XEN +#include +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#ifdef SMP +#include +#endif + +#ifdef XBOX +#include +#endif + +#include +#include +#include +#include +#include + +#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) +#define CPU_ENABLE_SSE +#endif + +#ifndef PMAP_SHPGPERPROC +#define PMAP_SHPGPERPROC 200 +#endif + +#define PMAP_DIAGNOSTIC + +#if defined(DIAGNOSTIC) +#define PMAP_DIAGNOSTIC +#endif + +#if !defined(PMAP_DIAGNOSTIC) +#define PMAP_INLINE __inline +#else +#define PMAP_INLINE +#endif + +/* + * Get PDEs and PTEs for user/kernel address space + */ +#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) +#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) + +#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) +#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) +#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) +#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) +#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) + +#ifndef XEN +#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ + atomic_clear_int((u_int *)(pte), PG_W)) +#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) +#endif + +struct pmap kernel_pmap_store; +LIST_HEAD(pmaplist, pmap); +static struct pmaplist allpmaps; +static struct mtx allpmaps_lock; + +vm_paddr_t avail_end; /* PA of last available physical page */ +vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ +vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ +int pgeflag = 0; /* PG_G or-in */ +int pseflag = 0; /* PG_PS or-in */ + +int nkpt; +vm_offset_t kernel_vm_end; +extern u_int32_t KERNend; + +#if defined(PAE) && !defined(XEN) +static uma_zone_t pdptzone; +#endif + +/* + * Data for the pv entry allocation mechanism + */ +static uma_zone_t pvzone; +static struct vm_object pvzone_obj; +static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; +int pmap_pagedaemon_waken; + +/* + * All those kernel PT submaps that BSD is so fond of + */ +struct sysmaps { + struct mtx lock; + pt_entry_t *CMAP1; + pt_entry_t *CMAP2; + caddr_t CADDR1; + caddr_t CADDR2; +}; +static struct sysmaps sysmaps_pcpu[MAXCPU]; +pt_entry_t *CMAP1 = 0; +static pt_entry_t *CMAP3; +caddr_t CADDR1 = 0, ptvmmap = 0; +static caddr_t CADDR3; +struct msgbuf *msgbufp = 0; + +/* + * Crashdump maps. + */ +static caddr_t crashdumpmap; + +#ifdef SMP +extern pt_entry_t *SMPpt; +#endif +static pt_entry_t *PMAP1 = 0, *PMAP2; +static pt_entry_t *PADDR1 = 0, *PADDR2; +#ifdef SMP +static int PMAP1cpu; +static int PMAP1changedcpu; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, + &PMAP1changedcpu, 0, + "Number of times pmap_pte_quick changed CPU with same PMAP1"); +#endif +static int PMAP1changed; +SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, + &PMAP1changed, 0, + "Number of times pmap_pte_quick changed PMAP1"); +static int PMAP1unchanged; +SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, + &PMAP1unchanged, 0, + "Number of times pmap_pte_quick didn't change PMAP1"); +static struct mtx PMAP2mutex; + +static PMAP_INLINE void free_pv_entry(pv_entry_t pv); +static pv_entry_t get_pv_entry(void); +static void pmap_clear_ptes(vm_page_t m, int bit); + +static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, + vm_page_t m, vm_prot_t prot, vm_page_t mpte); +static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, + vm_page_t *free); +static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); +static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, + vm_offset_t va); +static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); +static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, + vm_page_t m); + +static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); + +static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); +static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); +static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); +static void pmap_pte_release(pt_entry_t *pte); +static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); +static vm_offset_t pmap_kmem_choose(vm_offset_t addr); +#if defined(PAE) && !defined(XEN) +static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); +#endif + +CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); +CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); + +/* + * If you get an error here, then you set KVA_PAGES wrong! See the + * description of KVA_PAGES in sys/i386/include/pmap.h. It must be + * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. + */ +CTASSERT(KERNBASE % (1 << 24) == 0); + +static __inline void +pagezero(void *page) +{ +#if defined(I686_CPU) + if (cpu_class == CPUCLASS_686) { +#if defined(CPU_ENABLE_SSE) + if (cpu_feature & CPUID_SSE2) + sse2_pagezero(page); + else +#endif + i686_pagezero(page); + } else +#endif + bzero(page, PAGE_SIZE); +} + +void +pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) +{ + vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); + + switch (type) { + case SH_PD_SET_VA: +#if 0 + xen_queue_pt_update(shadow_pdir_ma, + xpmap_ptom(val & ~(PG_RW))); +#endif + xen_queue_pt_update(pdir_ma, + xpmap_ptom(val)); + break; + case SH_PD_SET_VA_MA: +#if 0 + xen_queue_pt_update(shadow_pdir_ma, + val & ~(PG_RW)); +#endif + xen_queue_pt_update(pdir_ma, val); + break; + case SH_PD_SET_VA_CLEAR: +#if 0 + xen_queue_pt_update(shadow_pdir_ma, 0); +#endif + xen_queue_pt_update(pdir_ma, 0); + break; + } +} + +/* + * Move the kernel virtual free pointer to the next + * 4MB. This is used to help improve performance + * by using a large (4MB) page for much of the kernel + * (.text, .data, .bss) + */ +static vm_offset_t +pmap_kmem_choose(vm_offset_t addr) +{ + vm_offset_t newaddr = addr; + +#ifndef DISABLE_PSE + if (cpu_feature & CPUID_PSE) + newaddr = (addr + PDRMASK) & ~PDRMASK; +#endif + return newaddr; +} + +/* + * Bootstrap the system enough to run with virtual memory. + * + * On the i386 this is called after mapping has already been enabled + * and just syncs the pmap module with what has already been done. + * [We can't call it easily with mapping off since the kernel is not + * mapped with PA == VA, hence we would have to relocate every address + * from the linked base (virtual) address "KERNBASE" to the actual + * (physical) address starting relative to 0] + */ +void +pmap_bootstrap(firstaddr, loadaddr) + vm_paddr_t firstaddr; + vm_paddr_t loadaddr; +{ + vm_offset_t va; + pt_entry_t *pte, *unused; + struct sysmaps *sysmaps; + int i; + + /* + * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too + * large. It should instead be correctly calculated in locore.s and + * not based on 'first' (which is a physical address, not a virtual + * address, for the start of unused physical memory). The kernel + * page tables are NOT double mapped and thus should not be included + * in this calculation. + */ + virtual_avail = (vm_offset_t) KERNBASE + firstaddr; + virtual_avail = pmap_kmem_choose(virtual_avail); + + virtual_end = VM_MAX_KERNEL_ADDRESS; + + /* + * Initialize the kernel pmap (which is statically allocated). + */ + PMAP_LOCK_INIT(kernel_pmap); + kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); +#ifdef PAE + kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); +#endif + kernel_pmap->pm_active = -1; /* don't allow deactivation */ + TAILQ_INIT(&kernel_pmap->pm_pvlist); + LIST_INIT(&allpmaps); + mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); + mtx_lock_spin(&allpmaps_lock); + LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + nkpt = NKPT; + + /* + * Reserve some special page table entries/VA space for temporary + * mapping of pages. + */ +#define SYSMAP(c, p, v, n) \ + v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); + + va = virtual_avail; + pte = vtopte(va); + + /* + * CMAP1/CMAP2 are used for zeroing and copying pages. + * CMAP3 is used for the idle process page zeroing. + */ + for (i = 0; i < MAXCPU; i++) { + sysmaps = &sysmaps_pcpu[i]; + mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); + SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) + SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) + } + SYSMAP(caddr_t, CMAP1, CADDR1, 1) + SYSMAP(caddr_t, CMAP3, CADDR3, 1) +#ifdef XEN + PT_SET_MA(CADDR3, 0); +#else + *CMAP3 = 0; +#endif + /* + * Crashdump maps. + */ + SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) + + /* + * ptvmmap is used for reading arbitrary physical pages via /dev/mem. + */ + SYSMAP(caddr_t, unused, ptvmmap, 1) + + /* + * msgbufp is used to map the system message buffer. + */ + SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) + + /* + * ptemap is used for pmap_pte_quick + */ + SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); + SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); + + mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); + + virtual_avail = va; +#ifdef XEN + PT_SET_MA(CADDR1, 0); +#else + *CMAP1 = 0; +#endif + +#if !defined(XEN) +#ifdef XBOX + /* FIXME: This is gross, but needed for the XBOX. Since we are in such + * an early stadium, we cannot yet neatly map video memory ... :-( + * Better fixes are very welcome! + */ + if (!arch_i386_is_xbox) +#endif + for (i = 0; i < NKPT; i++) + PTD[i] = 0; + + /* Initialize the PAT MSR if present. */ + pmap_init_pat(); + + /* Turn on PG_G on kernel page(s) */ + pmap_set_pg(); +#endif /* !XEN */ +} + +/* + * Setup the PAT MSR. + */ +void +pmap_init_pat(void) +{ + uint64_t pat_msr; + + /* Bail if this CPU doesn't implement PAT. */ + if (!(cpu_feature & CPUID_PAT)) + return; + +#ifdef PAT_WORKS + /* + * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. + * Program 4 and 5 as WP and WC. + * Leave 6 and 7 as UC and UC-. + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); + pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | + PAT_VALUE(5, PAT_WRITE_COMBINING); +#else + /* + * Due to some Intel errata, we can only safely use the lower 4 + * PAT entries. Thus, just replace PAT Index 2 with WC instead + * of UC-. + * + * Intel Pentium III Processor Specification Update + * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B + * or Mode C Paging) + * + * Intel Pentium IV Processor Specification Update + * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) + */ + pat_msr = rdmsr(MSR_PAT); + pat_msr &= ~PAT_MASK(2); + pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); +#endif + wrmsr(MSR_PAT, pat_msr); +} + +/* + * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. + */ +void +pmap_set_pg(void) +{ + pd_entry_t pdir; + pt_entry_t *pte; + vm_offset_t va, endva; + int i; + + if (pgeflag == 0) + return; + + i = KERNLOAD/NBPDR; + endva = KERNBASE + KERNend; + + if (pseflag) { + va = KERNBASE + KERNLOAD; + while (va < endva) { + pdir = kernel_pmap->pm_pdir[KPTDI+i]; + pdir |= pgeflag; + kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; + invltlb(); /* Play it safe, invltlb() every time */ + i++; + va += NBPDR; + } + } else { + va = (vm_offset_t)btext; + while (va < endva) { + pte = vtopte(va); + if (*pte) { +#ifdef XEN + PT_SET_MA(va, *pte | pgeflag); +#else + *pte |= pgeflag; +#endif + } + invltlb(); /* Play it safe, invltlb() every time */ + va += PAGE_SIZE; + } + } +} + +/* + * Initialize a vm_page's machine-dependent fields. + */ +void +pmap_page_init(vm_page_t m) +{ + + TAILQ_INIT(&m->md.pv_list); + m->md.pv_list_count = 0; +} + +#if defined(PAE) && !defined(XEN) + +static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt"); + +static void * +pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + *flags = UMA_SLAB_PRIV; + return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL, + 1, 0)); +} +#endif + +/* + * Initialize the pmap module. + * Called by vm_init, to initialize any structures that the pmap + * system needs to map virtual memory. + */ +void +pmap_init(void) +{ + int shpgperproc = PMAP_SHPGPERPROC; + + /* + * Initialize the address space (zone) for the pv entries. Set a + * high water mark so that the system can recover from excessive + * numbers of pv entries. + */ + pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); + TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); + pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; + TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); + pv_entry_high_water = 9 * (pv_entry_max / 10); + uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); + +#if defined(PAE) && !defined(XEN) + pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, + NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, + UMA_ZONE_VM | UMA_ZONE_NOFREE); + uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); +#endif +} + +void +pmap_init2() +{ +} + + +/*************************************************** + * Low level helper routines..... + ***************************************************/ + +/* + * Determine the appropriate bits to set in a PTE or PDE for a specified + * caching mode. + */ +static int +pmap_cache_bits(int mode, boolean_t is_pde) +{ + int pat_flag, pat_index, cache_bits; + + /* The PAT bit is different for PTE's and PDE's. */ + pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; + + /* If we don't support PAT, map extended modes to older ones. */ + if (!(cpu_feature & CPUID_PAT)) { + switch (mode) { + case PAT_UNCACHEABLE: + case PAT_WRITE_THROUGH: + case PAT_WRITE_BACK: + break; + case PAT_UNCACHED: + case PAT_WRITE_COMBINING: + case PAT_WRITE_PROTECTED: + mode = PAT_UNCACHEABLE; + break; + } + } + + /* Map the caching mode to a PAT index. */ + switch (mode) { +#ifdef PAT_WORKS + case PAT_UNCACHEABLE: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_UNCACHED: + pat_index = 2; + break; + case PAT_WRITE_COMBINING: + pat_index = 5; + break; + case PAT_WRITE_PROTECTED: + pat_index = 4; + break; +#else + case PAT_UNCACHED: + case PAT_UNCACHEABLE: + case PAT_WRITE_PROTECTED: + pat_index = 3; + break; + case PAT_WRITE_THROUGH: + pat_index = 1; + break; + case PAT_WRITE_BACK: + pat_index = 0; + break; + case PAT_WRITE_COMBINING: + pat_index = 2; + break; +#endif + default: + panic("Unknown caching mode %d\n", mode); + } + + /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ + cache_bits = 0; + if (pat_index & 0x4) + cache_bits |= pat_flag; + if (pat_index & 0x2) + cache_bits |= PG_NC_PCD; + if (pat_index & 0x1) + cache_bits |= PG_NC_PWT; + return (cache_bits); +} +#ifdef SMP +/* + * For SMP, these functions have to use the IPI mechanism for coherence. + * + * N.B.: Before calling any of the following TLB invalidation functions, + * the calling processor must ensure that all stores updating a non- + * kernel page table are globally performed. Otherwise, another + * processor could cache an old, pre-update entry without being + * invalidated. This can happen one of two ways: (1) The pmap becomes + * active on another processor after its pm_active field is checked by + * one of the following functions but before a store updating the page + * table is globally performed. (2) The pmap becomes active on another + * processor before its pm_active field is checked but due to + * speculative loads one of the following functions stills reads the + * pmap as inactive on the other processor. + * + * The kernel page table is exempt because its pm_active field is + * immutable. The kernel page table is always active on every + * processor. + */ +void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + u_int cpumask; + u_int other_cpus; + + CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", + pmap, va); + + sched_pin(); + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invlpg(va); + smp_invlpg(va); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invlpg(va); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg(pmap->pm_active & other_cpus, va); + } + PT_UPDATES_FLUSH(); + sched_unpin(); +} + +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + u_int cpumask; + u_int other_cpus; + vm_offset_t addr; + + CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", + pmap, sva, eva); + + sched_pin(); + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + smp_invlpg_range(sva, eva); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + if (pmap->pm_active & other_cpus) + smp_masked_invlpg_range(pmap->pm_active & other_cpus, + sva, eva); + } + PT_UPDATES_FLUSH(); + sched_unpin(); +} + +void +pmap_invalidate_all(pmap_t pmap) +{ + u_int cpumask; + u_int other_cpus; + + CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); + sched_pin(); + if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + invltlb(); + smp_invltlb(); + } else { + cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); + if (pmap->pm_active & cpumask) + invltlb(); + if (pmap->pm_active & other_cpus) + smp_masked_invltlb(pmap->pm_active & other_cpus); + } + sched_unpin(); +} + +void +pmap_invalidate_cache(void) +{ + + sched_pin(); + wbinvd(); + smp_cache_flush(); + sched_unpin(); +} +#else /* !SMP */ +/* + * Normal, non-SMP, 486+ invalidation functions. + * We inline these within pmap.c for speed. + */ +PMAP_INLINE void +pmap_invalidate_page(pmap_t pmap, vm_offset_t va) +{ + if (pmap == kernel_pmap || pmap->pm_active) { + CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", + pmap, va); + invlpg(va); + PT_UPDATES_FLUSH(); + } +} + +PMAP_INLINE void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t addr; + + if (pmap == kernel_pmap || pmap->pm_active) { + if (eva - sva > PAGE_SIZE) + CTR3(KTR_PMAP, + "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", + pmap, sva, eva); + for (addr = sva; addr < eva; addr += PAGE_SIZE) + invlpg(addr); + PT_UPDATES_FLUSH(); + } +} + +PMAP_INLINE void +pmap_invalidate_all(pmap_t pmap) +{ + + + if (pmap == kernel_pmap || pmap->pm_active) { + CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); + invltlb(); + } +} + +PMAP_INLINE void +pmap_invalidate_cache(void) +{ + + wbinvd(); +} +#endif /* !SMP */ + +/* + * Are we current address space or kernel? N.B. We return FALSE when + * a pmap's page table is in use because a kernel thread is borrowing + * it. The borrowed page table can change spontaneously, making any + * dependence on its continued use subject to a race condition. + */ +static __inline int +pmap_is_current(pmap_t pmap) +{ + + return (pmap == kernel_pmap || + (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && + (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); +} + +/* + * If the given pmap is not the current or kernel pmap, the returned pte must + * be released by passing it to pmap_pte_release(). + */ +pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t newpf; + pd_entry_t *pde; + + pde = pmap_pde(pmap, va); + if (*pde & PG_PS) + return (pde); + if (*pde != 0) { + /* are we current address space or kernel? */ + if (pmap_is_current(pmap)) + return (vtopte(va)); + mtx_lock(&PMAP2mutex); + newpf = *pde & PG_FRAME; + if ((*PMAP2 & PG_FRAME) != newpf) { +#ifdef XEN + PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); + CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", + pmap, va, (*PMAP2 & 0xffffffff)); +#else + *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; + pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); +#endif + } + return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); + } + return (0); +} + +/* + * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte + * being NULL. + */ +static __inline void +pmap_pte_release(pt_entry_t *pte) +{ + + if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { + CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", + *PMAP2); + PT_SET_VA_MA(PMAP2, 0, TRUE); + mtx_unlock(&PMAP2mutex); + } +} + +static __inline void +invlcaddr(void *caddr) +{ + + invlpg((u_int)caddr); + PT_UPDATES_FLUSH(); +} + +/* + * Super fast pmap_pte routine best used when scanning + * the pv lists. This eliminates many coarse-grained + * invltlb calls. Note that many of the pv list + * scans are across different pmaps. It is very wasteful + * to do an entire invltlb for checking a single mapping. + * + * If the given pmap is not the current pmap, vm_page_queue_mtx + * must be held and curthread pinned to a CPU. + */ +static pt_entry_t * +pmap_pte_quick(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t newpf; + pd_entry_t *pde; + + pde = pmap_pde(pmap, va); + if (*pde & PG_PS) + return (pde); + + /* + * + * XXX hitting this indicates that things are AFU + */ + if (*pde != 0) { + /* are we current address space or kernel? */ + if (pmap_is_current(pmap)) + return (vtopte(va)); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); + newpf = *pde & PG_FRAME; + if ((*PMAP1 & PG_FRAME) != newpf) { +#ifdef XEN + PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); + CTR3(KTR_PMAP, + "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", + pmap, va, (u_long)*PMAP1); +#else + *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; +#endif +#ifdef SMP + PMAP1cpu = PCPU_GET(cpuid); +#endif + invlcaddr(PADDR1); + PMAP1changed++; + } else +#ifdef SMP + if (PMAP1cpu != PCPU_GET(cpuid)) { + PMAP1cpu = PCPU_GET(cpuid); + invlcaddr(PADDR1); + PMAP1changedcpu++; + } else +#endif + PMAP1unchanged++; + return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); + } + return (0); +} + +/* + * Routine: pmap_extract + * Function: + * Extract the physical page address associated + * with the given map/virtual_address pair. + */ + +vm_paddr_t +pmap_extract(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t rtval; + pt_entry_t *pte; + pd_entry_t pde; + + rtval = 0; + PMAP_LOCK(pmap); + pde = pmap->pm_pdir[va >> PDRSHIFT]; + if (pde != 0) { + if ((pde & PG_PS) != 0) { +#ifdef XEN + rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); +#else + rtval = (pde & ~PDRMASK) | (va & PDRMASK); +#endif + PMAP_UNLOCK(pmap); + return (rtval); + } + pte = pmap_pte(pmap, va); +#ifdef XEN + rtval = ((*pte ? xpmap_mtop(*pte) : 0) & PG_FRAME) | (va & PAGE_MASK); + +#else + rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); +#endif + pmap_pte_release(pte); + } + PMAP_UNLOCK(pmap); + return (rtval); +} + +vm_paddr_t +pmap_extract_ma(pmap_t pmap, vm_offset_t va) +{ + vm_paddr_t rtval; + pt_entry_t *pte; + pd_entry_t pde; + + rtval = 0; + PMAP_LOCK(pmap); + pde = pmap->pm_pdir[va >> PDRSHIFT]; + if (pde != 0) { + if ((pde & PG_PS) != 0) { + rtval = (pde & ~PDRMASK) | (va & PDRMASK); + + PMAP_UNLOCK(pmap); + return (rtval); + } + pte = pmap_pte(pmap, va); + + rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); + pmap_pte_release(pte); + } + PMAP_UNLOCK(pmap); + return (rtval); +} + + +/* + * Routine: pmap_extract_and_hold + * Function: + * Atomically extract and hold the physical page + * with the given pmap and virtual address pair + * if that mapping permits the given protection. + */ +vm_page_t +pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) +{ + pd_entry_t pde; + pt_entry_t pte; + vm_page_t m; + + m = NULL; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + pde = PT_GET(pmap_pde(pmap, va)); + if (pde != 0) { + if (pde & PG_PS) { + if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { + m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) | + (va & PDRMASK)); + vm_page_hold(m); + } + } else { + sched_pin(); + pte = PT_GET(pmap_pte_quick(pmap, va)); + if (pte != 0 && + ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { + m = PHYS_TO_VM_PAGE(pte & PG_FRAME); + vm_page_hold(m); + } + sched_unpin(); + } + } + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); + return (m); +} + +/*************************************************** + * Low level mapping routines..... + ***************************************************/ + +/* + * Add a wired page to the kva. + * Note: not SMP coherent. + */ +PMAP_INLINE void +pmap_kenter(vm_offset_t va, vm_paddr_t pa) +{ + PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); +} + +PMAP_INLINE void +pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa) +{ + + PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag); +} + +PMAP_INLINE void +pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) +{ + PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); +} + +/* + * Remove a page from the kernel pagetables. + * Note: not SMP coherent. + */ +PMAP_INLINE void +pmap_kremove(vm_offset_t va) +{ + pt_entry_t *pte; + + pte = vtopte(va); + PT_SET_VA_MA(pte, 0, FALSE); +} + +/* + * Used to map a range of physical addresses into kernel + * virtual address space. + * + * The value passed in '*virt' is a suggested virtual address for + * the mapping. Architectures which can support a direct-mapped + * physical to virtual region can return the appropriate address + * within that region, leaving '*virt' unchanged. Other + * architectures should map the pages starting at '*virt' and + * update '*virt' with the first usable address after the mapped + * region. + */ +vm_offset_t +pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) +{ + vm_offset_t va, sva; + + va = sva = *virt; + while (start < end) { + pmap_kenter(va, start); + va += PAGE_SIZE; + start += PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); + *virt = va; + return (sva); +} + + +/* + * Add a list of wired pages to the kva + * this routine is only used for temporary + * kernel mappings that do not need to have + * page modification or references recorded. + * Note that old mappings are simply written + * over. The page *must* be wired. + * Note: SMP coherent. Uses a ranged shootdown IPI. + */ +void +pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) +{ + pt_entry_t *endpte, oldpte, *pte; + + oldpte = 0; + pte = vtopte(sva); + endpte = pte + count; + vm_page_lock_queues(); + critical_enter(); + while (pte < endpte) { + oldpte |= *pte; +#ifdef XEN + PT_SET_VA(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V, FALSE); +#else + pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V); +#endif + pte++; + ma++; + } + PT_UPDATES_FLUSH(); + if ((oldpte & PG_V) != 0) + pmap_invalidate_range(kernel_pmap, sva, sva + count * + PAGE_SIZE); + vm_page_unlock_queues(); + critical_exit(); +} + +/* + * This routine tears out page mappings from the + * kernel -- it is meant only for temporary mappings. + * Note: SMP coherent. Uses a ranged shootdown IPI. + */ +void +pmap_qremove(vm_offset_t sva, int count) +{ + vm_offset_t va; + + va = sva; + vm_page_lock_queues(); + critical_enter(); + while (count-- > 0) { + pmap_kremove(va); + va += PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, sva, va); + critical_exit(); + vm_page_unlock_queues(); + +} + +/*************************************************** + * Page table page management routines..... + ***************************************************/ +static PMAP_INLINE void +pmap_free_zero_pages(vm_page_t free) +{ + vm_page_t m; + + while (free != NULL) { + m = free; + free = m->right; + vm_page_free_zero(m); + } +} + +/* + * This routine unholds page table pages, and if the hold count + * drops to zero, then it decrements the wire count. + */ +static PMAP_INLINE int +pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) +{ + + --m->wire_count; + if (m->wire_count == 0) + return _pmap_unwire_pte_hold(pmap, m, free); + else + return 0; +} + +static int +_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) +{ + vm_offset_t pteva; + + PT_UPDATES_FLUSH(); + /* + * unmap the page table page + */ +#ifdef XEN + xen_pt_unpin(pmap->pm_pdir[m->pindex]); + PT_SET_VA_MA(&pmap->pm_pdir[m->pindex], 0, TRUE); + pmap_zero_page(m); +#else + pmap->pm_pdir[m->pindex] = 0; +#endif + --pmap->pm_stats.resident_count; + + /* + * This is a release store so that the ordinary store unmapping + * the page table page is globally performed before TLB shoot- + * down is begun. + */ + atomic_subtract_rel_int(&cnt.v_wire_count, 1); + + /* + * Do an invltlb to make the invalidated mapping + * take effect immediately. + */ + pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); + pmap_invalidate_page(pmap, pteva); + + /* + * Put page on a list so that it is released after + * *ALL* TLB shootdown is done + */ + m->right = *free; + *free = m; + + return 1; +} + +/* + * After removing a page table entry, this routine is used to + * conditionally free the page, and manage the hold/wire counts. + */ +static int +pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) +{ + pd_entry_t ptepde; + vm_page_t mpte; + + if (va >= VM_MAXUSER_ADDRESS) + return 0; + ptepde = PT_GET(pmap_pde(pmap, va)); + mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); + return pmap_unwire_pte_hold(pmap, mpte, free); +} + +void +pmap_pinit0(pmap) + struct pmap *pmap; +{ + + PMAP_LOCK_INIT(pmap); + pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); +#ifdef PAE + pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); +#endif + pmap->pm_active = 0; + PCPU_SET(curpmap, pmap); + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); + mtx_lock_spin(&allpmaps_lock); + LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); +} + +/* + * Initialize a preallocated and zeroed pmap structure, + * such as one in a vmspace structure. + */ +void +pmap_pinit(struct pmap *pmap) +{ +#ifdef XEN + vm_page_t m, ptdpg[NPGPTD + 1]; + int npgptd = NPGPTD + 1; +#else + vm_page_t m, ptdpg[NPGPTD]; + vm_paddr_t pa; + int npgptd = NPGPTD; +#endif + static int color; + int i; + + PMAP_LOCK_INIT(pmap); + + /* + * No need to allocate page table space yet but we do need a valid + * page directory table. + */ + if (pmap->pm_pdir == NULL) { + pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, + NBPTD); + +#if defined(PAE) +#ifdef XEN + pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1); +#else + pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); + KASSERT(((vm_offset_t)pmap->pm_pdpt & + ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, + ("pmap_pinit: pdpt misaligned")); + KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), + ("pmap_pinit: pdpt above 4g")); +#endif /* !XEN */ +#endif /* PAE */ + } + + /* + * allocate the page directory page(s) + */ + for (i = 0; i < npgptd;) { + m = vm_page_alloc(NULL, color++, + VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | + VM_ALLOC_ZERO); + if (m == NULL) + VM_WAIT; + else { + ptdpg[i++] = m; + } + } + + pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); + + for (i = 0; i < NPGPTD; i++) { + if ((ptdpg[i]->flags & PG_ZERO) == 0) + pagezero(&pmap->pm_pdir[i*NPDEPG]); + } + + mtx_lock_spin(&allpmaps_lock); + LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + /* Wire in kernel global address entries. */ + bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); + +#ifdef PAE +#ifdef XEN + pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); + if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) + bzero(pmap->pm_pdpt, PAGE_SIZE); +#endif + for (i = 0; i < NPGPTD; i++) { + vm_paddr_t ma; + + ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); + pmap->pm_pdpt[i] = ma | PG_V; + + } +#endif + +#ifdef XEN + for (i = 0; i < NPGPTD; i++) { + pt_entry_t *pd; + vm_paddr_t ma; + + ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); + pd = pmap->pm_pdir + (i * NPDEPG); + PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); + + } + +#ifdef PAE + PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); +#endif + vm_page_lock_queues(); + xen_flush_queue(); + xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD]))); + for (i = 0; i < NPGPTD; i++) { + vm_paddr_t ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); + PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); + } + xen_flush_queue(); + vm_page_unlock_queues(); +#else + /* install self-referential address mapping entry(s) */ + for (i = 0; i < NPGPTD; i++) { + pa = VM_PAGE_TO_PHYS(ptdpg[i]); + pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; +#ifdef PAE + pmap->pm_pdpt[i] = pa | PG_V; +#endif + } +#endif + pmap->pm_active = 0; + TAILQ_INIT(&pmap->pm_pvlist); + bzero(&pmap->pm_stats, sizeof pmap->pm_stats); +} + +/* + * this routine is called if the page table page is not + * mapped correctly. + */ +static vm_page_t +_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) +{ + vm_paddr_t ptepa; + vm_page_t m; + + KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || + (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, + ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + + /* + * Allocate a page table page. + */ + if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | + VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { + if (flags & M_WAITOK) { + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); + VM_WAIT; + vm_page_lock_queues(); + PMAP_LOCK(pmap); + } + + /* + * Indicate the need to retry. While waiting, the page table + * page may have been allocated. + */ + return (NULL); + } + if ((m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + + /* + * Map the pagetable page into the process address space, if + * it isn't already there. + */ + + pmap->pm_stats.resident_count++; +#ifdef XEN + ptepa = xpmap_ptom(VM_PAGE_TO_PHYS(m)); + xen_pt_pin(ptepa); + PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], + (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); + + KASSERT(pmap->pm_pdir[ptepindex], + ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); +#else + ptepa = VM_PAGE_TO_PHYS(m); + pmap->pm_pdir[ptepindex] = + (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); +#endif + return (m); +} + +static vm_page_t +pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) +{ + unsigned ptepindex; + pd_entry_t ptepa; + vm_page_t m; + + KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || + (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, + ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; +retry: + /* + * Get the page directory entry + */ + ptepa = pmap->pm_pdir[ptepindex]; + + /* + * XXX track me down and fix me! + */ + if ((ptepa & PG_V) == 0) { + if (ptepa && ((ptepa & PG_V) == 0)) + panic("phys addr set but not valid"); + } + + /* + * This supports switching from a 4MB page to a + * normal 4K page. + */ + if (ptepa & PG_PS) { + pmap->pm_pdir[ptepindex] = 0; + ptepa = 0; + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + pmap_invalidate_all(kernel_pmap); + } + + /* + * If the page table page is mapped, we just increment the + * hold count, and activate it. + */ + if (ptepa) { +#ifdef XEN + m = PHYS_TO_VM_PAGE(xpmap_mtop(ptepa)); +#else + m = PHYS_TO_VM_PAGE(ptepa); +#endif + m->wire_count++; + } else { + /* + * Here if the pte page isn't mapped, or if it has + * been deallocated. + */ + CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", + pmap, va, flags); + + m = _pmap_allocpte(pmap, ptepindex, flags); + if (m == NULL && (flags & M_WAITOK)) + goto retry; + KASSERT(pmap->pm_pdir[ptepindex], + ("ptepindex=%d did not get mapped", ptepindex)); + } + return (m); +} + + +/*************************************************** +* Pmap allocation/deallocation routines. + ***************************************************/ + +#ifdef SMP +/* + * Deal with a SMP shootdown of other users of the pmap that we are + * trying to dispose of. This can be a bit hairy. + */ +static u_int *lazymask; +static u_int lazyptd; +static volatile u_int lazywait; + +void pmap_lazyfix_action(void); + +void +pmap_lazyfix_action(void) +{ + u_int mymask = PCPU_GET(cpumask); + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); + atomic_store_rel_int(&lazywait, 1); +} + +static void +pmap_lazyfix_self(u_int mymask) +{ + + if (rcr3() == lazyptd) + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + atomic_clear_int(lazymask, mymask); +} + + +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int mymask; + u_int mask; + register u_int spins; + + while ((mask = pmap->pm_active) != 0) { + spins = 50000000; + mask = mask & -mask; /* Find least significant set bit */ + mtx_lock_spin(&smp_ipi_mtx); +#ifdef PAE + lazyptd = vtophys(pmap->pm_pdpt); +#else + lazyptd = vtophys(pmap->pm_pdir); +#endif + mymask = PCPU_GET(cpumask); + if (mask == mymask) { + lazymask = &pmap->pm_active; + pmap_lazyfix_self(mymask); + } else { + atomic_store_rel_int((u_int *)&lazymask, + (u_int)&pmap->pm_active); + atomic_store_rel_int(&lazywait, 0); + ipi_selected(mask, IPI_LAZYPMAP); + while (lazywait == 0) { + ia32_pause(); + if (--spins == 0) + break; + } + } + mtx_unlock_spin(&smp_ipi_mtx); + if (spins == 0) + printf("pmap_lazyfix: spun for 50000000\n"); + } +} + +#else /* SMP */ + +/* + * Cleaning up on uniprocessor is easy. For various reasons, we're + * unlikely to have to even execute this code, including the fact + * that the cleanup is deferred until the parent does a wait(2), which + * means that another userland process has run. + */ +static void +pmap_lazyfix(pmap_t pmap) +{ + u_int cr3; + + cr3 = vtophys(pmap->pm_pdir); + if (cr3 == rcr3()) { + load_cr3(PCPU_GET(curpcb)->pcb_cr3); + pmap->pm_active &= ~(PCPU_GET(cpumask)); + } +} +#endif /* SMP */ + +/* + * Release any resources held by the given physical map. + * Called when a pmap initialized by pmap_pinit is being released. + * Should only be called if the map contains no valid mappings. + */ +void +pmap_release(pmap_t pmap) +{ +#ifdef XEN + vm_page_t m, ptdpg[NPGPTD+1]; + int npgptd = NPGPTD + 1; +#else + vm_page_t m, ptdpg[NPGPTD]; + int npgptd = NPGPTD; +#endif + int i; + + KASSERT(pmap->pm_stats.resident_count == 0, + ("pmap_release: pmap resident count %ld != 0", + pmap->pm_stats.resident_count)); + + pmap_lazyfix(pmap); + mtx_lock_spin(&allpmaps_lock); + LIST_REMOVE(pmap, pm_list); + mtx_unlock_spin(&allpmaps_lock); + +#ifdef XEN + for (i = 0; i < NPGPTD; i++) + ptdpg[i] = PHYS_TO_VM_PAGE(xpmap_mtop(pmap->pm_pdir[PTDPTDI + i])); +#else + for (i = 0; i < NPGPTD; i++) + ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]); +#endif + + bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * + sizeof(*pmap->pm_pdir)); + + pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); +#if defined(PAE) && defined(XEN) + ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); +#endif + + vm_page_lock_queues(); + for (i = 0; i < npgptd; i++) { + vm_paddr_t ma; + + m = ptdpg[i]; + ma = xpmap_ptom(VM_PAGE_TO_PHYS(m)); + /* unpinning L1 and L2 treated the same */ + xen_pgd_unpin(ma); +#ifdef PAE + KASSERT( +#ifdef XEN + xpmap_ptom(VM_PAGE_TO_PHYS(m)) +#else + VM_PAGE_TO_PHYS(m) +#endif + == (pmap->pm_pdpt[i] & PG_FRAME), + ("pmap_release: got wrong ptd page")); +#endif + m->wire_count--; + atomic_subtract_int(&cnt.v_wire_count, 1); + vm_page_free(m); + } + vm_page_unlock_queues(); + PMAP_LOCK_DESTROY(pmap); +} + +static int +kvm_size(SYSCTL_HANDLER_ARGS) +{ + unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; + + return sysctl_handle_long(oidp, &ksize, 0, req); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, + 0, 0, kvm_size, "IU", "Size of KVM"); + +static int +kvm_free(SYSCTL_HANDLER_ARGS) +{ + unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; + + return sysctl_handle_long(oidp, &kfree, 0, req); +} +SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, + 0, 0, kvm_free, "IU", "Amount of KVM free"); + +/* + * grow the number of kernel page table entries, if needed + */ +void +pmap_growkernel(vm_offset_t addr) +{ + struct pmap *pmap; + vm_paddr_t ptppaddr; + vm_page_t nkpg; + pd_entry_t newpdir; + + mtx_assert(&kernel_map->system_mtx, MA_OWNED); + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + while (pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + nkpt++; + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + } + } + addr = roundup2(addr, PAGE_SIZE * NPTEPG); + if (addr - 1 >= kernel_map->max_offset) + addr = kernel_map->max_offset; + while (kernel_vm_end < addr) { + if (pdir_pde(PTD, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + continue; + } + + nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, + VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); + if (nkpg == NULL) + panic("pmap_growkernel: no memory to grow kernel"); + + nkpt++; + + pmap_zero_page(nkpg); + ptppaddr = VM_PAGE_TO_PHYS(nkpg); + newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); + vm_page_lock_queues(); + PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); + mtx_lock_spin(&allpmaps_lock); + LIST_FOREACH(pmap, &allpmaps, pm_list) { + PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); + } + mtx_unlock_spin(&allpmaps_lock); + vm_page_unlock_queues(); + + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + if (kernel_vm_end - 1 >= kernel_map->max_offset) { + kernel_vm_end = kernel_map->max_offset; + break; + } + } +} + + +/*************************************************** + * page management routines. + ***************************************************/ + +/* + * free the pv_entry back to the free list + */ +static PMAP_INLINE void +free_pv_entry(pv_entry_t pv) +{ + pv_entry_count--; + uma_zfree(pvzone, pv); +} + +/* + * get a new pv_entry, allocating a block from the system + * when needed. + * the memory allocation is performed bypassing the malloc code + * because of the possibility of allocations at interrupt time. + */ +static pv_entry_t +get_pv_entry(void) +{ + pv_entry_count++; + if ((pv_entry_count > pv_entry_high_water) && + (pmap_pagedaemon_waken == 0)) { + pmap_pagedaemon_waken = 1; + wakeup (&vm_pages_needed); + } + return uma_zalloc(pvzone, M_NOWAIT); +} + + +static void +pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if (m->md.pv_list_count < pmap->pm_stats.resident_count) { + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pmap == pv->pv_pmap && va == pv->pv_va) + break; + } + } else { + TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { + if (va == pv->pv_va) + break; + } + } + KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + free_pv_entry(pv); +} + +/* + * Create a pv entry for page at pa for + * (pmap, va). + */ +static void +pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pv; + + pv = get_pv_entry(); + if (pv == NULL) + panic("no pv entries: increase vm.pmap.shpgperproc"); + pv->pv_va = va; + pv->pv_pmap = pmap; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; +} + +/* + * Conditionally create a pv entry. + */ +static boolean_t +pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) +{ + pv_entry_t pv; + + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if (pv_entry_count < pv_entry_high_water && + (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) { + pv_entry_count++; + pv->pv_va = va; + pv->pv_pmap = pmap; + TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count++; + return (TRUE); + } else + return (FALSE); +} + +/* + * pmap_remove_pte: do the things to unmap a page in a process + */ +static int +pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) +{ + pt_entry_t oldpte; + vm_page_t m; + + CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", + pmap, (u_long)*ptq, va); + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); +#ifdef XEN + oldpte = *ptq; + PT_SET_VA_MA(ptq, 0, TRUE); +#else + oldpte = pte_load_clear(ptq); +#endif + if (oldpte & PG_W) + pmap->pm_stats.wired_count -= 1; + /* + * Machines that don't support invlpg, also don't support + * PG_G. + */ + if (oldpte & PG_G) + pmap_invalidate_page(kernel_pmap, va); + pmap->pm_stats.resident_count -= 1; + if (oldpte & PG_MANAGED) { + m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte)); + if (oldpte & PG_M) { + KASSERT((oldpte & PG_RW), + ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx", + va, (uintmax_t)oldpte)); + vm_page_dirty(m); + } + if (oldpte & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + pmap_remove_entry(pmap, m, va); + } + return (pmap_unuse_pt(pmap, va, free)); +} + +/* + * Remove a single page from a process address space + */ +static void +pmap_remove_page(pmap_t pmap, vm_offset_t va) +{ + pt_entry_t *pte; + vm_page_t free = NULL; + + CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", + pmap, va); + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) + return; + pmap_remove_pte(pmap, pte, va, &free); + pmap_invalidate_page(pmap, va); + pmap_free_zero_pages(free); +} + +/* + * Remove the given range of addresses from the specified map. + * + * It is assumed that the start and end are properly + * rounded to the page size. + */ +void +pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + vm_offset_t pdnxt; + pd_entry_t ptpaddr; + pt_entry_t *pte; + vm_page_t free = NULL; + int anyvalid; + + CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", + pmap, sva, eva); + + /* + * Perform an unsynchronized read. This is, however, safe. + */ + if (pmap->pm_stats.resident_count == 0) + return; + + anyvalid = 0; + + vm_page_lock_queues(); + sched_pin(); + PMAP_LOCK(pmap); + + /* + * special handling of removing one page. a very + * common operation and easy to short circuit some + * code. + */ + if ((sva + PAGE_SIZE == eva) && + ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { + pmap_remove_page(pmap, sva); + goto out; + } + + for (; sva < eva; sva = pdnxt) { + unsigned pdirindex; + + /* + * Calculate index for next page table. + */ + pdnxt = (sva + NBPDR) & ~PDRMASK; + if (pmap->pm_stats.resident_count == 0) + break; + + pdirindex = sva >> PDRSHIFT; + ptpaddr = pmap->pm_pdir[pdirindex]; + + /* + * Weed out invalid mappings. Note: we assume that the page + * directory table is always allocated, and in kernel virtual. + */ + if (ptpaddr == 0) + continue; + + /* + * Check for large page. + */ + if ((ptpaddr & PG_PS) != 0) { +#ifdef XEN + PT_SET_VA_MA(&pmap->pm_pdir[pdirindex], 0, TRUE); +#else + pmap->pm_pdir[pdirindex] = 0; +#endif + pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; + anyvalid = 1; + continue; + } + + /* + * Limit our scan to either the end of the va represented + * by the current page table page, or to the end of the + * range being removed. + */ + if (pdnxt > eva) + pdnxt = eva; + + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + if (*pte == 0) + continue; + + /* + * The TLB entry for a PG_G mapping is invalidated + * by pmap_remove_pte(). + */ + if ((*pte & PG_G) == 0) + anyvalid = 1; + if (pmap_remove_pte(pmap, pte, sva, &free)) + break; + } + } + PT_UPDATES_FLUSH(); + +out: + if (anyvalid) { + pmap_invalidate_all(pmap); + pmap_free_zero_pages(free); + } + sched_unpin(); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * Routine: pmap_remove_all + * Function: + * Removes this physical page from + * all physical maps in which it resides. + * Reflects back modify bits to the pager. + * + * Notes: + * Original versions of this routine were very + * inefficient because they iteratively called + * pmap_remove (slow...) + */ + +void +pmap_remove_all(vm_page_t m) +{ + register pv_entry_t pv; + pt_entry_t *pte, tpte; + vm_page_t free; + +#if defined(PMAP_DIAGNOSTIC) + /* + * XXX This makes pmap_remove_all() illegal for non-managed pages! + */ + if (m->flags & PG_FICTITIOUS) { + panic("pmap_remove_all: illegal for unmanaged page, va: 0x%jx", + VM_PAGE_TO_PHYS(m)); + } +#endif + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + sched_pin(); + while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + PMAP_LOCK(pv->pv_pmap); + pv->pv_pmap->pm_stats.resident_count--; + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); +#if defined(XEN) + tpte = *pte; + PT_SET_VA_MA(pte, 0, TRUE); +#else + tpte = pte_load_clear(pte); +#endif + if (tpte & PG_W) + pv->pv_pmap->pm_stats.wired_count--; + if (tpte & PG_A) + vm_page_flag_set(m, PG_REFERENCED); + + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { + KASSERT((tpte & PG_RW), + ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx", + pv->pv_va, (uintmax_t)tpte)); + vm_page_dirty(m); + } + free = NULL; + pmap_unuse_pt(pv->pv_pmap, pv->pv_va, &free); + pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + pmap_free_zero_pages(free); + TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + m->md.pv_list_count--; + PMAP_UNLOCK(pv->pv_pmap); + free_pv_entry(pv); + } + vm_page_flag_clear(m, PG_WRITEABLE); + PT_UPDATES_FLUSH(); + sched_unpin(); +} + +/* + * Set the physical protection on the + * specified range of this map as requested. + */ +void +pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) +{ + vm_offset_t pdnxt; + pd_entry_t ptpaddr; + pt_entry_t *pte; + int anychanged; + + CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", + pmap, sva, eva, prot); + + if ((prot & VM_PROT_READ) == VM_PROT_NONE) { + pmap_remove(pmap, sva, eva); + return; + } + + if (prot & VM_PROT_WRITE) + return; + + anychanged = 0; + + vm_page_lock_queues(); + sched_pin(); + PMAP_LOCK(pmap); + for (; sva < eva; sva = pdnxt) { + unsigned pdirindex; + vm_paddr_t obits, pbits; + + pdnxt = (sva + NBPDR) & ~PDRMASK; + + pdirindex = sva >> PDRSHIFT; + ptpaddr = pmap->pm_pdir[pdirindex]; + + /* + * Weed out invalid mappings. Note: we assume that the page + * directory table is always allocated, and in kernel virtual. + */ + if (ptpaddr == 0) + continue; + + /* + * Check for large page. + */ + if ((ptpaddr & PG_PS) != 0) { + pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); + anychanged = 1; + continue; + } + + if (pdnxt > eva) + pdnxt = eva; + + for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, + sva += PAGE_SIZE) { + vm_page_t m; + +retry: + /* + * Regardless of whether a pte is 32 or 64 bits in + * size, PG_RW, PG_A, and PG_M are among the least + * significant 32 bits. + */ + obits = pbits = *pte; + if (pbits & PG_MANAGED) { +#ifdef XEN + pt_entry_t pteval = xpmap_mtop(*pte); +#else + pt_entry_t pteval = *pte; +#endif + + m = NULL; + if (pbits & PG_A) { + m = PHYS_TO_VM_PAGE(pteval); + + vm_page_flag_set(m, PG_REFERENCED); + pbits &= ~PG_A; + } + if ((pbits & PG_M) != 0) { + if (m == NULL) + m = PHYS_TO_VM_PAGE(pteval); + vm_page_dirty(m); + } + } + + pbits &= ~(PG_RW | PG_M); + + if (pbits != obits) { +#ifdef XEN + obits = *pte; + PT_SET_VA_MA(pte, pbits, TRUE); + if (*pte != pbits) + goto retry; +#else + if (!atomic_cmpset_int((u_int *)pte, obits, + pbits)) + goto retry; +#endif + if (obits & PG_G) + pmap_invalidate_page(pmap, sva); + else + anychanged = 1; + } + } + } + PT_UPDATES_FLUSH(); + if (anychanged) + pmap_invalidate_all(pmap); + sched_unpin(); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * Insert the given physical page (p) at + * the specified virtual address (v) in the + * target physical map with the protection requested. + * + * If specified, the page will be wired down, meaning + * that the related pte can not be reclaimed. + * + * NB: This is the only routine which MAY NOT lazy-evaluate + * or lose information. That is, this routine must actually + * insert this page into the given map NOW. + */ +void +pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, + boolean_t wired) +{ + vm_paddr_t pa; + pd_entry_t *pde; + register pt_entry_t *pte; + vm_paddr_t opa; + pt_entry_t origpte, newpte; + vm_page_t mpte, om; + boolean_t invlva; + + + CTR5(KTR_PMAP, + "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x wired=%d", + pmap, va, xpmap_ptom(VM_PAGE_TO_PHYS(m)), prot, wired); + va &= PG_FRAME; +#ifdef PMAP_DIAGNOSTIC + if (va > VM_MAX_KERNEL_ADDRESS) + panic("pmap_enter: toobig"); + if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) + panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); +#endif + + mpte = NULL; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + sched_pin(); + + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + mpte = pmap_allocpte(pmap, va, M_WAITOK); + } +#if 0 && defined(PMAP_DIAGNOSTIC) + else { + pd_entry_t *pdeaddr = pmap_pde(pmap, va); + origpte = *pdeaddr; + if ((origpte & PG_V) == 0) { + panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", + pmap->pm_pdir[PTDPTDI], origpte, va); + } + } +#endif + + pde = pmap_pde(pmap, va); + if ((*pde & PG_PS) != 0) + panic("pmap_enter: attempted pmap_enter on 4MB page"); + pte = pmap_pte_quick(pmap, va); + + /* + * Page Directory table entry not valid, we need a new PT page + */ + if (pte == NULL) { + panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", + (uintmax_t)pmap->pm_pdir[PTDPTDI], va); + } + + pa = VM_PAGE_TO_PHYS(m); + om = NULL; + + origpte = *pte; + if (origpte) + origpte = xpmap_mtop(origpte); + opa = origpte & PG_FRAME; + + /* + * Mapping has not changed, must be protection or wiring change. + */ + if (origpte && (opa == pa)) { + /* + * Wiring change, just update stats. We don't worry about + * wiring PT pages as they remain resident as long as there + * are valid mappings in them. Hence, if a user page is wired, + * the PT page will be also. + */ + if (wired && ((origpte & PG_W) == 0)) + pmap->pm_stats.wired_count++; + else if (!wired && (origpte & PG_W)) + pmap->pm_stats.wired_count--; + + /* + * Remove extra pte reference + */ + if (mpte) + mpte->wire_count--; + + /* + * We might be turning off write access to the page, + * so we go ahead and sense modify status. + */ + if (origpte & PG_MANAGED) { + om = m; + pa |= PG_MANAGED; + } + goto validate; + } + /* + * Mapping has changed, invalidate old range and fall through to + * handle validating new mapping. + */ + if (opa) { + if (origpte & PG_W) + pmap->pm_stats.wired_count--; + if (origpte & PG_MANAGED) { + om = PHYS_TO_VM_PAGE(opa); + pmap_remove_entry(pmap, om, va); + } + if (mpte != NULL) { + mpte->wire_count--; + KASSERT(mpte->wire_count > 0, + ("pmap_enter: missing reference to page table page," + " va: 0x%x", va)); + } + } else + pmap->pm_stats.resident_count++; + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, + ("pmap_enter: managed mapping within the clean submap")); + pmap_insert_entry(pmap, va, m); + pa |= PG_MANAGED; + } + + /* + * Increment counters + */ + if (wired) + pmap->pm_stats.wired_count++; + +validate: + /* + * Now validate mapping with desired protection/wiring. + */ + newpte = (pt_entry_t)(pa | PG_V); + if ((prot & VM_PROT_WRITE) != 0) + newpte |= PG_RW; + if (wired) + newpte |= PG_W; + if (va < VM_MAXUSER_ADDRESS) + newpte |= PG_U; + if (pmap == kernel_pmap) + newpte |= pgeflag; + + critical_enter(); + /* + * if the mapping or permission bits are different, we need + * to update the pte. + */ + if ((origpte & ~(PG_M|PG_A)) != newpte) { + if (origpte & PG_V) { + invlva = FALSE; +#ifdef XEN + origpte = *pte; + PT_SET_VA(pte, newpte | PG_A, FALSE); +#else + origpte = pte_load_store(pte, newpte | PG_A); +#endif + if (origpte & PG_A) { + if (origpte & PG_MANAGED) + vm_page_flag_set(om, PG_REFERENCED); + if (opa != VM_PAGE_TO_PHYS(m)) + invlva = TRUE; + } + if (origpte & PG_M) { + KASSERT((origpte & PG_RW), + ("pmap_enter: modified page not writable: va: %#x, pte: %#jx", + va, (uintmax_t)origpte)); + if ((origpte & PG_MANAGED) != 0) + vm_page_dirty(om); + if ((prot & VM_PROT_WRITE) == 0) + invlva = TRUE; + } + if (invlva) + pmap_invalidate_page(pmap, va); + } else { +#ifdef XEN + PT_SET_VA(pte, newpte | PG_A, FALSE); +#else + pte_store(pte, newpte | PG_A); +#endif + } + } + PT_UPDATES_FLUSH(); + critical_exit(); + sched_unpin(); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * Maps a sequence of resident pages belonging to the same object. + * The sequence begins with the given page m_start. This page is + * mapped at the given virtual address start. Each subsequent page is + * mapped at a virtual address that is offset from start by the same + * amount as the page is offset from m_start within the object. The + * last page in the sequence is the page with the largest offset from + * m_start that can be mapped at a virtual address less than the given + * virtual address end. Not every virtual page between start and end + * is mapped; only those for which a resident page exists with the + * corresponding offset from m_start are mapped. + */ +void +pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, + vm_page_t m_start, vm_prot_t prot) +{ + vm_page_t m, mpte; + vm_pindex_t diff, psize; + + VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); + psize = atop(end - start); + mpte = NULL; + m = m_start; + PMAP_LOCK(pmap); + while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { + mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m, + prot, mpte); + m = TAILQ_NEXT(m, listq); + } + PMAP_UNLOCK(pmap); +} + +/* + * this code makes some *MAJOR* assumptions: + * 1. Current pmap & pmap exists. + * 2. Not wired. + * 3. Read access. + * 4. No page table pages. + * but is *MUCH* faster than pmap_enter... + */ + +void +pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) +{ + + PMAP_LOCK(pmap); + (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); + PMAP_UNLOCK(pmap); +} + +static vm_page_t +pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, vm_page_t mpte) +{ + pt_entry_t *pte; + vm_paddr_t pa; + vm_page_t free; + + pa = VM_PAGE_TO_PHYS(m); + pa = pa ? xpmap_ptom(pa) >> PAGE_SHIFT : 0; + + CTR4(KTR_PMAP, + "pmap_enter_quick_locked: pmap=%p va=0x%08x mfn=%d prot=0x%x", + pmap, va, pa, prot); + KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || + (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, + ("pmap_enter_quick_locked: managed mapping within the clean submap")); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + PMAP_LOCK_ASSERT(pmap, MA_OWNED); + + /* + * In the case that a page table page is not + * resident, we are creating it here. + */ + if (va < VM_MAXUSER_ADDRESS) { + unsigned ptepindex; + pd_entry_t ptepa; + + /* + * Calculate pagetable page index + */ + ptepindex = va >> PDRSHIFT; + if (mpte && (mpte->pindex == ptepindex)) { + mpte->wire_count++; + } else { + /* + * Get the page directory entry + */ + ptepa = pmap->pm_pdir[ptepindex]; + + /* + * If the page table page is mapped, we just increment + * the hold count, and activate it. + */ + if (ptepa) { + ptepa = xpmap_mtop(ptepa); + if (ptepa & PG_PS) + panic("pmap_enter_quick: unexpected mapping into 4MB page"); + mpte = PHYS_TO_VM_PAGE(ptepa); + mpte->wire_count++; + } else { + mpte = _pmap_allocpte(pmap, ptepindex, + M_NOWAIT); + if (mpte == NULL) + return (mpte); + } + } + } else { + mpte = NULL; + } + + /* + * This call to vtopte makes the assumption that we are + * entering the page into the current pmap. In order to support + * quick entry into any pmap, one would likely use pmap_pte_quick. + * But that isn't as quick as vtopte. + */ + pte = vtopte(va); + if (*pte) { + if (mpte != NULL) { + mpte->wire_count--; + mpte = NULL; + } + return (mpte); + } + + /* + * Enter on the PV list if part of our managed memory. + */ + if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && + !pmap_try_insert_pv_entry(pmap, va, m)) { + if (mpte != NULL) { + free = NULL; + if (pmap_unwire_pte_hold(pmap, mpte, &free)) { + pmap_invalidate_page(pmap, va); + pmap_free_zero_pages(free); + } + + mpte = NULL; + } + return (mpte); + } + + /* + * Increment counters + */ + pmap->pm_stats.resident_count++; + + pa = VM_PAGE_TO_PHYS(m); + + /* + * Now validate mapping with RO protection + */ +#ifdef XEN + if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) + PT_SET_VA(pte, pa | PG_V | PG_U, TRUE); + else + PT_SET_VA(pte, pa | PG_V | PG_U | PG_MANAGED, TRUE); +#else + if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) + pte_store(pte, pa | PG_V | PG_U); + else + pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); +#endif + return (mpte); +} + +/* + * Make a temporary mapping for a physical address. This is only intended + * to be used for panic dumps. + */ +void * +pmap_kenter_temporary(vm_paddr_t pa, int i) +{ + vm_offset_t va; + + va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); + pmap_kenter(va, pa); + invlpg(va); + return ((void *)crashdumpmap); +} + +/* + * This code maps large physical mmap regions into the + * processor address space. Note that some shortcuts + * are taken, but the code works. + */ +void +pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, + vm_object_t object, vm_pindex_t pindex, + vm_size_t size) +{ + vm_page_t p; + + CTR5(KTR_PMAP, + "pmap_object_init_pt: pmap=%p addr=0x%08x object=%p pindex=%d size=%d", + pmap, addr, object, pindex, size); + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); + KASSERT(object->type == OBJT_DEVICE, + ("pmap_object_init_pt: non-device object")); + if (pseflag && + ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { + int i; + vm_page_t m[1]; + unsigned int ptepindex; + int npdes; + pd_entry_t ptepa; + + PMAP_LOCK(pmap); + if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) + goto out; + PMAP_UNLOCK(pmap); +retry: + p = vm_page_lookup(object, pindex); + if (p != NULL) { + vm_page_lock_queues(); + if (vm_page_sleep_if_busy(p, FALSE, "init4p")) + goto retry; + } else { + p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); + if (p == NULL) + return; + m[0] = p; + + if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { + vm_page_lock_queues(); + vm_page_free(p); + vm_page_unlock_queues(); + return; + } + + p = vm_page_lookup(object, pindex); + vm_page_lock_queues(); + vm_page_wakeup(p); + } + vm_page_unlock_queues(); + pmap_zero_page(p); + + ptepa = VM_PAGE_TO_PHYS(p); + if (ptepa & (NBPDR - 1)) + return; + + p->valid = VM_PAGE_BITS_ALL; + + PMAP_LOCK(pmap); + pmap->pm_stats.resident_count += size >> PAGE_SHIFT; + npdes = size >> PDRSHIFT; + critical_enter(); + for (i = 0; i < npdes; i++) { +#ifdef XEN + int flags = PG_U | PG_RW | PG_V | PG_PS; +#else + int flags = PG_U | PG_V | PG_PS; +#endif + pde_store(&pmap->pm_pdir[ptepindex], + ptepa | flags); + ptepa += NBPDR; + ptepindex += 1; + } + pmap_invalidate_all(pmap); + critical_exit(); + out: + PMAP_UNLOCK(pmap); + } +} + +/* + * Routine: pmap_change_wiring + * Function: Change the wiring attribute for a map/virtual-address + * pair. + * In/out conditions: + * The mapping must already exist in the pmap. + */ +void +pmap_change_wiring(pmap, va, wired) + register pmap_t pmap; + vm_offset_t va; + boolean_t wired; +{ + register pt_entry_t *pte; + + vm_page_lock_queues(); + PMAP_LOCK(pmap); + pte = pmap_pte(pmap, va); + + if (wired && !pmap_pte_w(pte)) { + PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); + pmap->pm_stats.wired_count++; + } else if (!wired && pmap_pte_w(pte)) { + PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); + pmap->pm_stats.wired_count--; + } + + /* + * Wiring is not a hardware characteristic so there is no need to + * invalidate TLB. + */ + pmap_pte_release(pte); + PMAP_UNLOCK(pmap); + vm_page_unlock_queues(); +} + + + +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ + +void +pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, + vm_offset_t src_addr) +{ + vm_page_t free; + vm_offset_t addr; + vm_offset_t end_addr = src_addr + len; + vm_offset_t pdnxt; + + if (dst_addr != src_addr) + return; + + if (!pmap_is_current(src_pmap)) + return; + + CTR5(KTR_PMAP, + "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", + dst_pmap, src_pmap, dst_addr, len, src_addr); + + vm_page_lock_queues(); + if (dst_pmap < src_pmap) { + PMAP_LOCK(dst_pmap); + PMAP_LOCK(src_pmap); + } else { + PMAP_LOCK(src_pmap); + PMAP_LOCK(dst_pmap); + } + sched_pin(); + for (addr = src_addr; addr < end_addr; addr = pdnxt) { + pt_entry_t *src_pte, *dst_pte; + vm_page_t dstmpte, srcmpte; + pd_entry_t srcptepaddr; + unsigned ptepindex; + + if (addr >= UPT_MIN_ADDRESS) + panic("pmap_copy: invalid to pmap_copy page tables"); + + pdnxt = (addr + NBPDR) & ~PDRMASK; + ptepindex = addr >> PDRSHIFT; + + srcptepaddr = src_pmap->pm_pdir[ptepindex]; + if (srcptepaddr == 0) + continue; + + if (srcptepaddr & PG_PS) { + if (dst_pmap->pm_pdir[ptepindex] == 0) { + dst_pmap->pm_pdir[ptepindex] = srcptepaddr & + ~PG_W; + dst_pmap->pm_stats.resident_count += + NBPDR / PAGE_SIZE; + } + continue; + } + + srcmpte = MACH_TO_VM_PAGE(srcptepaddr); + if (srcmpte->wire_count == 0) + panic("pmap_copy: source page table page is unused"); + + if (pdnxt > end_addr) + pdnxt = end_addr; + + src_pte = vtopte(addr); + while (addr < pdnxt) { + pt_entry_t ptetemp; + ptetemp = *src_pte; + /* + * we only virtual copy managed pages + */ + if ((ptetemp & PG_MANAGED) != 0) { + dstmpte = pmap_allocpte(dst_pmap, addr, + M_NOWAIT); + if (dstmpte == NULL) + break; + dst_pte = pmap_pte_quick(dst_pmap, addr); + if (*dst_pte == 0 && + pmap_try_insert_pv_entry(dst_pmap, addr, + MACH_TO_VM_PAGE(ptetemp & PG_FRAME))) { + /* + * Clear the wired, modified, and + * accessed (referenced) bits + * during the copy. + */ + PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | + PG_A), FALSE); + dst_pmap->pm_stats.resident_count++; + } else { + free = NULL; + if (pmap_unwire_pte_hold( dst_pmap, + dstmpte, &free)) { + pmap_invalidate_page(dst_pmap, + addr); + pmap_free_zero_pages(free); + } + } + if (dstmpte->wire_count >= srcmpte->wire_count) + break; + } + addr += PAGE_SIZE; + src_pte++; + } + } + sched_unpin(); + vm_page_unlock_queues(); + PMAP_UNLOCK(src_pmap); + PMAP_UNLOCK(dst_pmap); +} + +/* + * pmap_zero_page zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + */ +void +pmap_zero_page(vm_page_t m) +{ + struct sysmaps *sysmaps; + + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP2) + panic("pmap_zero_page: CMAP2 busy"); + sched_pin(); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); +#else + *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; +#endif + invlcaddr(sysmaps->CADDR2); + pagezero(sysmaps->CADDR2); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR2, 0); +#else + *sysmaps->CMAP2 = 0; +#endif + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * pmap_zero_page_area zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. + * + * off and size may not cover an area beyond a single hardware page. + */ +void +pmap_zero_page_area(vm_page_t m, int off, int size) +{ + struct sysmaps *sysmaps; + + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP2) + panic("pmap_zero_page: CMAP2 busy"); + sched_pin(); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); +#else + *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; +#endif + invlcaddr(sysmaps->CADDR2); + if (off == 0 && size == PAGE_SIZE) + pagezero(sysmaps->CADDR2); + else + bzero((char *)sysmaps->CADDR2 + off, size); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR2, 0); +#else + *sysmaps->CMAP2 = 0; +#endif + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * pmap_zero_page_idle zeros the specified hardware page by mapping + * the page into KVM and using bzero to clear its contents. This + * is intended to be called from the vm_pagezero process only and + * outside of Giant. + */ +void +pmap_zero_page_idle(vm_page_t m) +{ + + if (*CMAP3) + panic("pmap_zero_page: CMAP3 busy"); + sched_pin(); +#ifdef XEN + PT_SET_MA(CADDR3, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); +#else + *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; +#endif + invlcaddr(CADDR3); + pagezero(CADDR3); +#ifdef XEN + PT_SET_MA(CADDR3, 0); +#else + *CMAP3 = 0; +#endif + sched_unpin(); +} + +/* + * pmap_copy_page copies the specified (machine independent) + * page by mapping the page into virtual memory and using + * bcopy to copy the page, one machine dependent page at a + * time. + */ +void +pmap_copy_page(vm_page_t src, vm_page_t dst) +{ + struct sysmaps *sysmaps; + + sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; + mtx_lock(&sysmaps->lock); + if (*sysmaps->CMAP1) + panic("pmap_copy_page: CMAP1 busy"); + if (*sysmaps->CMAP2) + panic("pmap_copy_page: CMAP2 busy"); + sched_pin(); + invlpg((u_int)sysmaps->CADDR1); + invlpg((u_int)sysmaps->CADDR2); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR1, PG_V | xpmap_ptom(VM_PAGE_TO_PHYS(src)) | PG_A); + PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(dst)) | PG_A | PG_M); + +#else + *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; + *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; +#endif + bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); +#ifdef XEN + PT_SET_MA(sysmaps->CADDR1, 0); + PT_SET_MA(sysmaps->CADDR2, 0); +#else + *sysmaps->CMAP1 = 0; + *sysmaps->CMAP2 = 0; +#endif + sched_unpin(); + mtx_unlock(&sysmaps->lock); +} + +/* + * Returns true if the pmap's pv is one of the first + * 16 pvs linked to from this page. This count may + * be changed upwards or downwards in the future; it + * is only necessary that true be returned for a small + * subset of pmaps for proper page aging. + */ +boolean_t +pmap_page_exists_quick(pmap, m) + pmap_t pmap; + vm_page_t m; +{ + pv_entry_t pv; + int loops = 0; + + if (m->flags & PG_FICTITIOUS) + return (FALSE); + + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + if (pv->pv_pmap == pmap) { + return TRUE; + } + loops++; + if (loops >= 16) + break; + } + return (FALSE); +} + +#define PMAP_REMOVE_PAGES_CURPROC_ONLY +/* + * Remove all pages from specified address space + * this aids process exit speeds. Also, this code + * is special cased for current process only, but + * can have the more generic (and slightly slower) + * mode enabled. This is much faster than pmap_remove + * in the case of running down an entire address space. + */ +void +pmap_remove_pages(pmap, sva, eva) + pmap_t pmap; + vm_offset_t sva, eva; +{ + pt_entry_t *pte, tpte; + vm_page_t m, free = NULL; + pv_entry_t pv, npv; + + CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { + printf("warning: pmap_remove_pages called with non-current pmap\n"); + return; + } +#endif + vm_page_lock_queues(); + KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); + PMAP_LOCK(pmap); + sched_pin(); + for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { + + if (pv->pv_va >= eva || pv->pv_va < sva) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + +#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY + pte = vtopte(pv->pv_va); +#else + pte = pmap_pte_quick(pmap, pv->pv_va); +#endif + tpte = *pte ? xpmap_mtop(*pte) : 0; + + if (tpte == 0) { + printf("TPTE at %p IS ZERO @ VA %08x\n", + pte, pv->pv_va); + panic("bad pte"); + } + +/* + * We cannot remove wired pages from a process' mapping at this time + */ + if (tpte & PG_W) { + npv = TAILQ_NEXT(pv, pv_plist); + continue; + } + + m = PHYS_TO_VM_PAGE(tpte); + KASSERT(m->phys_addr == (tpte & PG_FRAME), + ("vm_page_t %p phys_addr mismatch %016jx %016jx", + m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); + + KASSERT(m < &vm_page_array[vm_page_array_size], + ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); + + pmap->pm_stats.resident_count--; + +#ifdef XEN + PT_SET_VA_MA(pte, 0, FALSE); +#else + pte_clear(pte); +#endif + /* + * Update the vm_page_t clean and reference bits. + */ + if (tpte & PG_M) { + vm_page_dirty(m); + } + + npv = TAILQ_NEXT(pv, pv_plist); + TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); + + m->md.pv_list_count--; + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + if (TAILQ_EMPTY(&m->md.pv_list)) + vm_page_flag_clear(m, PG_WRITEABLE); + + pmap_unuse_pt(pmap, pv->pv_va, &free); + free_pv_entry(pv); + } + PT_UPDATES_FLUSH(); + sched_unpin(); + pmap_invalidate_all(pmap); + pmap_free_zero_pages(free); + vm_page_unlock_queues(); + PMAP_UNLOCK(pmap); +} + +/* + * pmap_is_modified: + * + * Return whether or not the specified physical page was modified + * in any physical maps. + */ +boolean_t +pmap_is_modified(vm_page_t m) +{ + pv_entry_t pv; + pt_entry_t *pte; + boolean_t rv; + + rv = FALSE; + if (m->flags & PG_FICTITIOUS) + return (rv); + + sched_pin(); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + PMAP_LOCK(pv->pv_pmap); + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + rv = (*pte & PG_M) != 0; + PMAP_UNLOCK(pv->pv_pmap); + if (rv) + break; + } + sched_unpin(); + return (rv); +} + +/* + * pmap_is_prefaultable: + * + * Return whether or not the specified virtual address is elgible + * for prefault. + */ +boolean_t +pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) +{ + pt_entry_t *pte; + boolean_t rv; + + rv = FALSE; +#ifdef XEN + /* + * disable prefaulting to start off + */ + return (rv); +#endif + PMAP_LOCK(pmap); + if (*pmap_pde(pmap, addr)) { + pte = vtopte(addr); + rv = *pte == 0; + } + PMAP_UNLOCK(pmap); + return (rv); +} + + +void +pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) +{ + int i, npages = round_page(len) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + pt_entry_t *pte; + pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); + pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); + PMAP_MARK_PRIV(xpmap_mtop(*pte)); + pmap_pte_release(pte); + } +} + +void +pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) +{ + int i, npages = round_page(len) >> PAGE_SHIFT; + for (i = 0; i < npages; i++) { + pt_entry_t *pte; + pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); + PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); + pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); + pmap_pte_release(pte); + } +} + +/* + * Clear the given bit in each of the given page's ptes. The bit is + * expressed as a 32-bit mask. Consequently, if the pte is 64 bits in + * size, only a bit within the least significant 32 can be cleared. + */ +static __inline void +pmap_clear_ptes(vm_page_t m, int bit) +{ + register pv_entry_t pv; + pt_entry_t pbits, *pte; + + if ((m->flags & PG_FICTITIOUS) || + (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) + return; + + sched_pin(); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + /* + * Loop over all current mappings setting/clearing as appropos If + * setting RO do we need to clear the VAC? + */ + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + PMAP_LOCK(pv->pv_pmap); + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); +retry: + pbits = *pte; + if (pbits & bit) { + if (bit == PG_RW) { + /* + * Regardless of whether a pte is 32 or 64 bits + * in size, PG_RW and PG_M are among the least + * significant 32 bits. + */ +#ifdef XEN + PT_SET_VA_MA(pte, (pbits & ~(PG_RW|PG_M)), TRUE); + if (*pte != (pbits & ~(PG_RW|PG_M))) + goto retry; +#else + if (!atomic_cmpset_int((u_int *)pte, pbits, + pbits & ~(PG_RW | PG_M))) + goto retry; +#endif + if (pbits & PG_M) { + vm_page_dirty(m); + } + } else { +#ifdef XEN + PT_SET_VA_MA(pte, pbits & ~bit, TRUE); +#else + atomic_clear_int((u_int *)pte, bit); +#endif + } + pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + } + PMAP_UNLOCK(pv->pv_pmap); + } + if (bit == PG_RW) + vm_page_flag_clear(m, PG_WRITEABLE); + sched_unpin(); +} + +/* + * pmap_page_protect: + * + * Lower the permission for all mappings to a given page. + */ +void +pmap_page_protect(vm_page_t m, vm_prot_t prot) +{ + if ((prot & VM_PROT_WRITE) == 0) { + if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { + pmap_clear_ptes(m, PG_RW); + } else { + pmap_remove_all(m); + } + } +} + +/* + * pmap_ts_referenced: + * + * Return a count of reference bits for a page, clearing those bits. + * It is not necessary for every reference bit to be cleared, but it + * is necessary that 0 only be returned when there are truly no + * reference bits set. + * + * XXX: The exact number of bits to check and clear is a matter that + * should be tested and standardized at some point in the future for + * optimal aging of shared pages. + */ +int +pmap_ts_referenced(vm_page_t m) +{ + register pv_entry_t pv, pvf, pvn; + pt_entry_t *pte; + pt_entry_t v; + int rtval = 0; + + if (m->flags & PG_FICTITIOUS) + return (rtval); + + sched_pin(); + mtx_assert(&vm_page_queue_mtx, MA_OWNED); + if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { + + pvf = pv; + + do { + pvn = TAILQ_NEXT(pv, pv_list); + + TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); + + TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); + + PMAP_LOCK(pv->pv_pmap); + pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); + + if (pte && ((v = *pte) & PG_A) != 0) { +#ifdef XEN + PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); +#else + atomic_clear_int((u_int *)pte, PG_A); +#endif + pmap_invalidate_page(pv->pv_pmap, pv->pv_va); + + rtval++; + if (rtval > 4) { + PMAP_UNLOCK(pv->pv_pmap); + break; + } + } + PMAP_UNLOCK(pv->pv_pmap); + } while ((pv = pvn) != NULL && pv != pvf); + } + PT_UPDATES_FLUSH(); + sched_unpin(); + + return (rtval); +} + +/* + * Clear the modify bits on the specified physical page. + */ +void +pmap_clear_modify(vm_page_t m) +{ + pmap_clear_ptes(m, PG_M); +} + +/* + * pmap_clear_reference: + * + * Clear the reference bit on the specified physical page. + */ +void +pmap_clear_reference(vm_page_t m) +{ + pmap_clear_ptes(m, PG_A); +} + +/* + * Miscellaneous support routines follow + */ + +/* + * Map a set of physical memory pages into the kernel virtual + * address space. Return a pointer to where it is mapped. This + * routine is intended to be used for mapping device memory, + * NOT real memory. + */ +void * +pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) +{ + vm_offset_t va, tmpva, offset; + + offset = pa & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + pa = pa & PG_FRAME; + + if (pa < KERNLOAD && pa + size <= KERNLOAD) + va = KERNBASE + pa; + else + va = kmem_alloc_nofault(kernel_map, size); + if (!va) + panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); + + for (tmpva = va; size > 0; ) { + pmap_kenter_attr(tmpva, pa, mode); + size -= PAGE_SIZE; + tmpva += PAGE_SIZE; + pa += PAGE_SIZE; + } + pmap_invalidate_range(kernel_pmap, va, tmpva); + pmap_invalidate_cache(); + return ((void *)(va + offset)); +} + +void * +pmap_mapdev(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); +} + +void * +pmap_mapbios(vm_paddr_t pa, vm_size_t size) +{ + + return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); +} + +void +pmap_unmapdev(va, size) + vm_offset_t va; + vm_size_t size; +{ + vm_offset_t base, offset, tmpva; + + if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) + return; + base = va & PG_FRAME; + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + critical_enter(); + for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) + pmap_kremove(tmpva); + pmap_invalidate_range(kernel_pmap, va, tmpva); + critical_exit(); + kmem_free(kernel_map, base, size); +} + +int +pmap_change_attr(va, size, mode) + vm_offset_t va; + vm_size_t size; + int mode; +{ + vm_offset_t base, offset, tmpva; + pt_entry_t *pte; + vm_paddr_t opte, npte; + pd_entry_t *pde; + + base = va & PG_FRAME; + offset = va & PAGE_MASK; + size = roundup(offset + size, PAGE_SIZE); + + /* Only supported on kernel virtual addresses. */ + if (base <= VM_MAXUSER_ADDRESS) + return (EINVAL); + + /* 4MB pages and pages that aren't mapped aren't supported. */ + for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { + pde = pmap_pde(kernel_pmap, tmpva); + if (*pde & PG_PS) + return (EINVAL); + if (*pde == 0) + return (EINVAL); + pte = vtopte(va); + if (*pte == 0) + return (EINVAL); + } + + /* + * Ok, all the pages exist and are 4k, so run through them updating + * their cache mode. + */ + for (tmpva = base; size > 0; ) { + pte = vtopte(tmpva); + + /* + * The cache mode bits are all in the low 32-bits of the + * PTE, so we can just spin on updating the low 32-bits. + */ + do { + opte = *pte; + npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); + npte |= pmap_cache_bits(mode, 0); +#ifdef XEN + PT_SET_VA_MA(pte, npte, TRUE); +#endif + } +#ifdef XEN + while (npte != opte && (*pte != npte)); +#else + while (npte != opte && + !atomic_cmpset_int((u_int *)pte, opte, npte)); +#endif + tmpva += PAGE_SIZE; + size -= PAGE_SIZE; + } + + /* + * Flush CPU caches to make sure any data isn't cached that shouldn't + * be, etc. + */ + pmap_invalidate_range(kernel_pmap, base, tmpva); + pmap_invalidate_cache(); + return (0); +} + +/* + * perform the pmap work for mincore + */ +int +pmap_mincore(pmap, addr) + pmap_t pmap; + vm_offset_t addr; +{ + pt_entry_t *ptep, pte; + vm_page_t m; + int val = 0; + + PMAP_LOCK(pmap); + ptep = pmap_pte(pmap, addr); + pte = (ptep != NULL) ? PT_GET(ptep) : 0; + pmap_pte_release(ptep); + PMAP_UNLOCK(pmap); + + if (pte != 0) { + vm_paddr_t pa; + + val = MINCORE_INCORE; + if ((pte & PG_MANAGED) == 0) + return val; + + pa = pte & PG_FRAME; + + m = PHYS_TO_VM_PAGE(pa); + + /* + * Modified by us + */ + if (pte & PG_M) + val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; + else { + /* + * Modified by someone else + */ + vm_page_lock_queues(); + if (m->dirty || pmap_is_modified(m)) + val |= MINCORE_MODIFIED_OTHER; + vm_page_unlock_queues(); + } + /* + * Referenced by us + */ + if (pte & PG_A) + val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; + else { + /* + * Referenced by someone else + */ + vm_page_lock_queues(); + if ((m->flags & PG_REFERENCED) || + pmap_ts_referenced(m)) { + val |= MINCORE_REFERENCED_OTHER; + vm_page_flag_set(m, PG_REFERENCED); + } + vm_page_unlock_queues(); + } + } + return val; +} + +void +pmap_activate(struct thread *td) +{ + pmap_t pmap, oldpmap; + u_int32_t cr3; + + critical_enter(); + pmap = vmspace_pmap(td->td_proc->p_vmspace); + oldpmap = PCPU_GET(curpmap); +#if defined(SMP) + atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); + atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); +#else + oldpmap->pm_active &= ~1; + pmap->pm_active |= 1; +#endif +#ifdef PAE + cr3 = vtophys(pmap->pm_pdpt); +#else + cr3 = vtophys(pmap->pm_pdir); +#endif + /* + * pmap_activate is for the current thread on the current cpu + */ + td->td_pcb->pcb_cr3 = cr3; + PT_UPDATES_FLUSH(); + load_cr3(cr3); + PCPU_SET(curpmap, pmap); + critical_exit(); +} + +vm_offset_t +pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) +{ + + if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { + return addr; + } + + addr = (addr + PDRMASK) & ~PDRMASK; + return addr; +} + +#ifdef XEN + +void +pmap_suspend() +{ + pmap_t pmap; + int i, pdir, offset; + vm_paddr_t pdirma; + mmu_update_t mu[4]; + + /* + * We need to remove the recursive mapping structure from all + * our pmaps so that Xen doesn't get confused when it restores + * the page tables. The recursive map lives at page directory + * index PTDPTDI. We assume that the suspend code has stopped + * the other vcpus (if any). + */ + LIST_FOREACH(pmap, &allpmaps, pm_list) { + for (i = 0; i < 4; i++) { + /* + * Figure out which page directory (L2) page + * contains this bit of the recursive map and + * the offset within that page of the map + * entry + */ + pdir = (PTDPTDI + i) / NPDEPG; + offset = (PTDPTDI + i) % NPDEPG; + pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; + mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); + mu[i].val = 0; + } + HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); + } +} + +void +pmap_resume() +{ + pmap_t pmap; + int i, pdir, offset; + vm_paddr_t pdirma; + mmu_update_t mu[4]; + + /* + * Restore the recursive map that we removed on suspend. + */ + LIST_FOREACH(pmap, &allpmaps, pm_list) { + for (i = 0; i < 4; i++) { + /* + * Figure out which page directory (L2) page + * contains this bit of the recursive map and + * the offset within that page of the map + * entry + */ + pdir = (PTDPTDI + i) / NPDEPG; + offset = (PTDPTDI + i) % NPDEPG; + pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; + mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); + mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; + } + HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); + } +} + +#endif + +#if defined(PMAP_DEBUG) +pmap_pid_dump(int pid) +{ + pmap_t pmap; + struct proc *p; + int npte = 0; + int index; + + sx_slock(&allproc_lock); + FOREACH_PROC_IN_SYSTEM(p) { + if (p->p_pid != pid) + continue; + + if (p->p_vmspace) { + int i,j; + index = 0; + pmap = vmspace_pmap(p->p_vmspace); + for (i = 0; i < NPDEPTD; i++) { + pd_entry_t *pde; + pt_entry_t *pte; + vm_offset_t base = i << PDRSHIFT; + + pde = &pmap->pm_pdir[i]; + if (pde && pmap_pde_v(pde)) { + for (j = 0; j < NPTEPG; j++) { + vm_offset_t va = base + (j << PAGE_SHIFT); + if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { + if (index) { + index = 0; + printf("\n"); + } + sx_sunlock(&allproc_lock); + return npte; + } + pte = pmap_pte(pmap, va); + if (pte && pmap_pte_v(pte)) { + pt_entry_t pa; + vm_page_t m; + pa = PT_GET(pte); + m = PHYS_TO_VM_PAGE(pa); + printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", + va, pa, m->hold_count, m->wire_count, m->flags); + npte++; + index++; + if (index >= 2) { + index = 0; + printf("\n"); + } else { + printf(" "); + } + } + } + } + } + } + } + sx_sunlock(&allproc_lock); + return npte; +} +#endif + +#if defined(DEBUG) + +static void pads(pmap_t pm); +void pmap_pvdump(vm_paddr_t pa); + +/* print address space of pmap*/ +static void +pads(pmap_t pm) +{ + int i, j; + vm_paddr_t va; + pt_entry_t *ptep; + + if (pm == kernel_pmap) + return; + for (i = 0; i < NPDEPTD; i++) + if (pm->pm_pdir[i]) + for (j = 0; j < NPTEPG; j++) { + va = (i << PDRSHIFT) + (j << PAGE_SHIFT); + if (pm == kernel_pmap && va < KERNBASE) + continue; + if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) + continue; + ptep = pmap_pte(pm, va); + if (pmap_pte_v(ptep)) + printf("%x:%x ", va, *ptep); + }; + +} + +void +pmap_pvdump(vm_paddr_t pa) +{ + pv_entry_t pv; + vm_page_t m; + + printf("pa %x", pa); + m = PHYS_TO_VM_PAGE(pa); + TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { + printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); + pads(pv->pv_pmap); + } + printf(" "); +} +#endif Property changes on: i386/xen/pmap.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: i386/pci/pci_pir.c =================================================================== --- i386/pci/pci_pir.c (.../stable/6/sys) (revision 184012) +++ i386/pci/pci_pir.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -137,6 +137,10 @@ int i; uint8_t ck, *cv; +#ifdef XEN + return; +#else + /* Don't try if we've already found a table. */ if (pci_route_table != NULL) return; @@ -147,7 +151,7 @@ sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0); if (sigaddr == 0) return; - +#endif /* If we found something, check the checksum and length. */ /* XXX - Use pmap_mapdev()? */ pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr); @@ -478,7 +482,11 @@ args.eax = PCIBIOS_ROUTE_INTERRUPT; args.ebx = (bus << 8) | (device << 3) | func; args.ecx = (irq << 8) | (0xa + pin); +#ifdef XEN + return (0); +#else return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL))); +#endif } Index: i386/pci/pci_cfgreg.c =================================================================== --- i386/pci/pci_cfgreg.c (.../stable/6/sys) (revision 184012) +++ i386/pci/pci_cfgreg.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -82,9 +82,10 @@ static int pcireg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes); +#ifndef XEN static int pcireg_cfgopen(void); - static int pciereg_cfgopen(void); +#endif static int pciereg_cfgread(int bus, int slot, int func, int reg, int bytes); static void pciereg_cfgwrite(int bus, int slot, int func, int reg, @@ -105,6 +106,7 @@ return (line); } +#ifndef XEN static u_int16_t pcibios_get_version(void) { @@ -125,6 +127,7 @@ } return (args.ebx & 0xffff); } +#endif /* * Initialise access to PCI configuration space @@ -132,6 +135,9 @@ int pci_cfgregopen(void) { +#ifdef XEN + return (0); +#else static int opened = 0; u_int16_t vid, did; u_int16_t v; @@ -171,6 +177,7 @@ } return(1); +#endif /* !XEN */ } /* @@ -349,6 +356,7 @@ mtx_unlock_spin(&pcicfg_mtx); } +#ifndef XEN /* check whether the configuration mechanism has been correctly identified */ static int pci_cfgcheck(int maxdev) @@ -526,6 +534,7 @@ devmax = 32; return (1); } +#endif /* !XEN */ #define PCIE_PADDR(bar, reg, bus, slot, func) \ ((bar) | \ Index: i386/isa/npx.c =================================================================== --- i386/isa/npx.c (.../stable/6/sys) (revision 184012) +++ i386/isa/npx.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -69,6 +69,10 @@ #include #include +#include +#ifdef XEN +#include +#endif #ifdef DEV_ISA #include #endif @@ -101,10 +105,15 @@ #define fxsave(addr) __asm __volatile("fxsave %0" : "=m" (*(addr))) #define ldmxcsr(__csr) __asm __volatile("ldmxcsr %0" : : "m" (__csr)) #endif +#ifdef XEN +#define start_emulating() (HYPERVISOR_fpu_taskswitch(1)) +#define stop_emulating() (HYPERVISOR_fpu_taskswitch(0)) +#else #define start_emulating() __asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \ : : "n" (CR0_TS) : "ax") #define stop_emulating() __asm("clts") +#endif #else /* !(__GNUCLIKE_ASM && !lint) */ void fldcw(caddr_t addr); Index: xen/features.c =================================================================== --- xen/features.c (.../stable/6/sys) (revision 0) +++ xen/features.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,26 @@ +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include +#include +#include + +uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32] /* __read_mostly */; + +void +setup_xen_features(void) +{ + xen_feature_info_t fi; + int i, j; + + for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) { + fi.submap_idx = i; + if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0) + break; + for (j = 0; j < 32; j++) + xen_features[i*32 + j] = !!(fi.submap & 1< +__FBSDID("$FreeBSD$"); + +#include "opt_global.h" +#include "opt_pmap.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c)) + +/* External tools reserve first few grant table entries. */ +#define NR_RESERVED_ENTRIES 8 +#define GNTTAB_LIST_END 0xffffffff +#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t)) + +static grant_ref_t **gnttab_list; +static unsigned int nr_grant_frames; +static unsigned int boot_max_nr_grant_frames; +static int gnttab_free_count; +static grant_ref_t gnttab_free_head; +static struct mtx gnttab_list_lock; + +static grant_entry_t *shared; + +static struct gnttab_free_callback *gnttab_free_callback_list = NULL; + +static int gnttab_expand(unsigned int req_entries); + +#define RPP (PAGE_SIZE / sizeof(grant_ref_t)) +#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP]) + +static int +get_free_entries(int count, int *entries) +{ + int ref, error; + grant_ref_t head; + + mtx_lock(&gnttab_list_lock); + if ((gnttab_free_count < count) && + ((error = gnttab_expand(count - gnttab_free_count)) != 0)) { + mtx_unlock(&gnttab_list_lock); + return (error); + } + ref = head = gnttab_free_head; + gnttab_free_count -= count; + while (count-- > 1) + head = gnttab_entry(head); + gnttab_free_head = gnttab_entry(head); + gnttab_entry(head) = GNTTAB_LIST_END; + mtx_unlock(&gnttab_list_lock); + + *entries = ref; + return (0); +} + +static void +do_free_callbacks(void) +{ + struct gnttab_free_callback *callback, *next; + + callback = gnttab_free_callback_list; + gnttab_free_callback_list = NULL; + + while (callback != NULL) { + next = callback->next; + if (gnttab_free_count >= callback->count) { + callback->next = NULL; + callback->fn(callback->arg); + } else { + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + } + callback = next; + } +} + +static inline void +check_free_callbacks(void) +{ + if (unlikely(gnttab_free_callback_list != NULL)) + do_free_callbacks(); +} + +static void +put_free_entry(grant_ref_t ref) +{ + + mtx_lock(&gnttab_list_lock); + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = ref; + gnttab_free_count++; + check_free_callbacks(); + mtx_unlock(&gnttab_list_lock); +} + +/* + * Public grant-issuing interface functions + */ + +int +gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly, + grant_ref_t *result) +{ + int error, ref; + + error = get_free_entries(1, &ref); + + if (unlikely(error)) + return (error); + + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); + + if (result) + *result = ref; + + return (0); +} + +void +gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int readonly) +{ + + shared[ref].frame = frame; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0); +} + +int +gnttab_query_foreign_access(grant_ref_t ref) +{ + uint16_t nflags; + + nflags = shared[ref].flags; + + return (nflags & (GTF_reading|GTF_writing)); +} + +int +gnttab_end_foreign_access_ref(grant_ref_t ref) +{ + uint16_t flags, nflags; + + nflags = shared[ref].flags; + do { + if ( (flags = nflags) & (GTF_reading|GTF_writing) ) { + printf("WARNING: g.e. still in use!\n"); + return (0); + } + } while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) != + flags); + + return (1); +} + +void +gnttab_end_foreign_access(grant_ref_t ref, void *page) +{ + if (gnttab_end_foreign_access_ref(ref)) { + put_free_entry(ref); + if (page != NULL) { + free(page, M_DEVBUF); + } + } + else { + /* XXX This needs to be fixed so that the ref and page are + placed on a list to be freed up later. */ + printf("WARNING: leaking g.e. and page still in use!\n"); + } +} + +int +gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, + grant_ref_t *result) +{ + int error, ref; + + error = get_free_entries(1, &ref); + if (unlikely(error)) + return (error); + + gnttab_grant_foreign_transfer_ref(ref, domid, pfn); + + *result = ref; + return (0); +} + +void +gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid, + unsigned long pfn) +{ + shared[ref].frame = pfn; + shared[ref].domid = domid; + wmb(); + shared[ref].flags = GTF_accept_transfer; +} + +unsigned long +gnttab_end_foreign_transfer_ref(grant_ref_t ref) +{ + unsigned long frame; + uint16_t flags; + + /* + * If a transfer is not even yet started, try to reclaim the grant + * reference and return failure (== 0). + */ + while (!((flags = shared[ref].flags) & GTF_transfer_committed)) { + if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags ) + return (0); + cpu_relax(); + } + + /* If a transfer is in progress then wait until it is completed. */ + while (!(flags & GTF_transfer_completed)) { + flags = shared[ref].flags; + cpu_relax(); + } + + /* Read the frame number /after/ reading completion status. */ + rmb(); + frame = shared[ref].frame; + KASSERT(frame != 0, ("grant table inconsistent")); + + return (frame); +} + +unsigned long +gnttab_end_foreign_transfer(grant_ref_t ref) +{ + unsigned long frame = gnttab_end_foreign_transfer_ref(ref); + + put_free_entry(ref); + return (frame); +} + +void +gnttab_free_grant_reference(grant_ref_t ref) +{ + + put_free_entry(ref); +} + +void +gnttab_free_grant_references(grant_ref_t head) +{ + grant_ref_t ref; + int count = 1; + + if (head == GNTTAB_LIST_END) + return; + + mtx_lock(&gnttab_list_lock); + ref = head; + while (gnttab_entry(ref) != GNTTAB_LIST_END) { + ref = gnttab_entry(ref); + count++; + } + gnttab_entry(ref) = gnttab_free_head; + gnttab_free_head = head; + gnttab_free_count += count; + check_free_callbacks(); + mtx_unlock(&gnttab_list_lock); +} + +int +gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head) +{ + int ref, error; + + error = get_free_entries(count, &ref); + if (unlikely(error)) + return (error); + + *head = ref; + return (0); +} + +int +gnttab_empty_grant_references(const grant_ref_t *private_head) +{ + + return (*private_head == GNTTAB_LIST_END); +} + +int +gnttab_claim_grant_reference(grant_ref_t *private_head) +{ + grant_ref_t g = *private_head; + + if (unlikely(g == GNTTAB_LIST_END)) + return (ENOSPC); + *private_head = gnttab_entry(g); + return (g); +} + +void +gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t release) +{ + + gnttab_entry(release) = *private_head; + *private_head = release; +} + +void +gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, uint16_t count) +{ + + mtx_lock(&gnttab_list_lock); + if (callback->next) + goto out; + callback->fn = fn; + callback->arg = arg; + callback->count = count; + callback->next = gnttab_free_callback_list; + gnttab_free_callback_list = callback; + check_free_callbacks(); + out: + mtx_unlock(&gnttab_list_lock); + +} + +void +gnttab_cancel_free_callback(struct gnttab_free_callback *callback) +{ + struct gnttab_free_callback **pcb; + + mtx_lock(&gnttab_list_lock); + for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) { + if (*pcb == callback) { + *pcb = callback->next; + break; + } + } + mtx_unlock(&gnttab_list_lock); +} + + +static int +grow_gnttab_list(unsigned int more_frames) +{ + unsigned int new_nr_grant_frames, extra_entries, i; + + new_nr_grant_frames = nr_grant_frames + more_frames; + extra_entries = more_frames * GREFS_PER_GRANT_FRAME; + + for (i = nr_grant_frames; i < new_nr_grant_frames; i++) + { + gnttab_list[i] = (grant_ref_t *) + malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + + if (!gnttab_list[i]) + goto grow_nomem; + } + + for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames; + i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(i) = gnttab_free_head; + gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames; + gnttab_free_count += extra_entries; + + nr_grant_frames = new_nr_grant_frames; + + check_free_callbacks(); + + return (0); + +grow_nomem: + for ( ; i >= nr_grant_frames; i--) + free(gnttab_list[i], M_DEVBUF); + return (ENOMEM); +} + +static unsigned int +__max_nr_grant_frames(void) +{ + struct gnttab_query_size query; + int rc; + + query.dom = DOMID_SELF; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1); + if ((rc < 0) || (query.status != GNTST_okay)) + return (4); /* Legacy max supported number of frames */ + + return (query.max_nr_frames); +} + +static inline +unsigned int max_nr_grant_frames(void) +{ + unsigned int xen_max = __max_nr_grant_frames(); + + if (xen_max > boot_max_nr_grant_frames) + return (boot_max_nr_grant_frames); + return (xen_max); +} + +#ifdef notyet +/* + * XXX needed for backend support + * + */ +static int +map_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + unsigned long **frames = (unsigned long **)data; + + set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL)); + (*frames)++; + return 0; +} + +static int +unmap_pte_fn(pte_t *pte, struct page *pmd_page, + unsigned long addr, void *data) +{ + + set_pte_at(&init_mm, addr, pte, __pte(0)); + return 0; +} +#endif + +#ifndef XENHVM + +static int +gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct gnttab_setup_table setup; + u_long *frames; + + unsigned int nr_gframes = end_idx + 1; + int i, rc; + + frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT); + if (!frames) + return (ENOMEM); + + setup.dom = DOMID_SELF; + setup.nr_frames = nr_gframes; + set_xen_guest_handle(setup.frame_list, frames); + + rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1); + if (rc == -ENOSYS) { + free(frames, M_DEVBUF); + return (ENOSYS); + } + KASSERT(!(rc || setup.status), + ("unexpected result from grant_table_op")); + + if (shared == NULL) { + vm_offset_t area; + + area = kmem_alloc_nofault(kernel_map, + PAGE_SIZE * max_nr_grant_frames()); + KASSERT(area, ("can't allocate VM space for grant table")); + shared = (grant_entry_t *)area; + } + + for (i = 0; i < nr_gframes; i++) + PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, + ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V); + + free(frames, M_DEVBUF); + + return (0); +} + +int +gnttab_resume(void) +{ + + if (max_nr_grant_frames() < nr_grant_frames) + return (ENOSYS); + return (gnttab_map(0, nr_grant_frames - 1)); +} + +int +gnttab_suspend(void) +{ + int i; + + for (i = 0; i < nr_grant_frames; i++) + pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE); + + return (0); +} + +#else /* XENHVM */ + +#include + +static vm_paddr_t resume_frames; + +static int gnttab_map(unsigned int start_idx, unsigned int end_idx) +{ + struct xen_add_to_physmap xatp; + unsigned int i = end_idx; + + /* + * Loop backwards, so that the first hypercall has the largest index, + * ensuring that the table will grow only once. + */ + do { + xatp.domid = DOMID_SELF; + xatp.idx = i; + xatp.space = XENMAPSPACE_grant_table; + xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + panic("HYPERVISOR_memory_op failed to map gnttab"); + } while (i-- > start_idx); + + if (shared == NULL) { + vm_offset_t area; + + area = kmem_alloc_nofault(kernel_map, + PAGE_SIZE * max_nr_grant_frames()); + KASSERT(area, ("can't allocate VM space for grant table")); + shared = (grant_entry_t *)area; + } + + for (i = start_idx; i <= end_idx; i++) { + pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE, + resume_frames + i * PAGE_SIZE); + } + + return (0); +} + +int +gnttab_resume(void) +{ + int error; + unsigned int max_nr_gframes, nr_gframes; + + nr_gframes = nr_grant_frames; + max_nr_gframes = max_nr_grant_frames(); + if (max_nr_gframes < nr_gframes) + return (ENOSYS); + + if (!resume_frames) { + error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes, + &resume_frames); + if (error) { + printf("error mapping gnttab share frames\n"); + return (error); + } + } + + return (gnttab_map(0, nr_gframes - 1)); +} + +#endif + +static int +gnttab_expand(unsigned int req_entries) +{ + int error; + unsigned int cur, extra; + + cur = nr_grant_frames; + extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) / + GREFS_PER_GRANT_FRAME); + if (cur + extra > max_nr_grant_frames()) + return (ENOSPC); + + error = gnttab_map(cur, cur + extra - 1); + if (!error) + error = grow_gnttab_list(extra); + + return (error); +} + +int +gnttab_init() +{ + int i; + unsigned int max_nr_glist_frames; + unsigned int nr_init_grefs; + + if (!is_running_on_xen()) + return (ENODEV); + + nr_grant_frames = 1; + boot_max_nr_grant_frames = __max_nr_grant_frames(); + + /* Determine the maximum number of frames required for the + * grant reference free list on the current hypervisor. + */ + max_nr_glist_frames = (boot_max_nr_grant_frames * + GREFS_PER_GRANT_FRAME / + (PAGE_SIZE / sizeof(grant_ref_t))); + + gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *), + M_DEVBUF, M_NOWAIT); + + if (gnttab_list == NULL) + return (ENOMEM); + + for (i = 0; i < nr_grant_frames; i++) { + gnttab_list[i] = (grant_ref_t *) + malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT); + if (gnttab_list[i] == NULL) + goto ini_nomem; + } + + if (gnttab_resume()) + return (ENODEV); + + nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME; + + for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++) + gnttab_entry(i) = i + 1; + + gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END; + gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES; + gnttab_free_head = NR_RESERVED_ENTRIES; + + if (bootverbose) + printf("Grant table initialized\n"); + + return (0); + +ini_nomem: + for (i--; i >= 0; i--) + free(gnttab_list[i], M_DEVBUF); + free(gnttab_list, M_DEVBUF); + return (ENOMEM); + +} + +MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); +//SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL); Property changes on: xen/gnttab.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/hypervisor.h =================================================================== --- xen/hypervisor.h (.../stable/6/sys) (revision 0) +++ xen/hypervisor.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,152 @@ +/****************************************************************************** + * hypervisor.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002, K A Fraser + */ + +#ifndef __XEN_HYPERVISOR_H__ +#define __XEN_HYPERVISOR_H__ + +#ifdef XENHVM + +#define is_running_on_xen() (HYPERVISOR_shared_info != NULL) + +#else + +#define is_running_on_xen() 1 + +#endif + +#ifdef PAE +#ifndef CONFIG_X86_PAE +#define CONFIG_X86_PAE +#endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(__amd64__) +#define MULTI_UVMFLAGS_INDEX 2 +#define MULTI_UVMDOMID_INDEX 3 +#else +#define MULTI_UVMFLAGS_INDEX 3 +#define MULTI_UVMDOMID_INDEX 4 +#endif + +#ifdef CONFIG_XEN_PRIVILEGED_GUEST +#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN) +#else +#define is_initial_xendomain() 0 +#endif + +extern start_info_t *xen_start_info; + +extern uint64_t get_system_time(int ticks); + +static inline int +HYPERVISOR_console_write(char *str, int count) +{ + return HYPERVISOR_console_io(CONSOLEIO_write, count, str); +} + +static inline void HYPERVISOR_crash(void) __dead2; + +static inline int +HYPERVISOR_yield(void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOXENSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); +#endif + return (rc); +} + +static inline int +HYPERVISOR_block( + void) +{ + int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOXENSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0); +#endif + return (rc); +} + + +static inline void +HYPERVISOR_shutdown(unsigned int reason) +{ + struct sched_shutdown sched_shutdown = { + .reason = reason + }; + + HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown); +#if CONFIG_XEN_COMPAT <= 0x030002 + HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason); +#endif +} + +static inline void +HYPERVISOR_crash(void) +{ + HYPERVISOR_shutdown(SHUTDOWN_crash); + /* NEVER REACHED */ + for (;;) ; /* eliminate noreturn error */ +} + +/* Transfer control to hypervisor until an event is detected on one */ +/* of the specified ports or the specified number of ticks elapse */ +static inline int +HYPERVISOR_poll( + evtchn_port_t *ports, unsigned int nr_ports, int ticks) +{ + int rc; + struct sched_poll sched_poll = { + .nr_ports = nr_ports, + .timeout = get_system_time(ticks) + }; + set_xen_guest_handle(sched_poll.ports, ports); + + rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOXENSYS) + rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0); +#endif + return (rc); +} + +static inline void +MULTI_update_va_mapping( + multicall_entry_t *mcl, unsigned long va, + uint64_t new_val, unsigned long flags) +{ + mcl->op = __HYPERVISOR_update_va_mapping; + mcl->args[0] = va; +#if defined(__amd64__) + mcl->args[1] = new_val; +#elif defined(PAE) + mcl->args[1] = (uint32_t)(new_val & 0xffffffff) ; + mcl->args[2] = (uint32_t)(new_val >> 32); +#else + mcl->args[1] = new_val; + mcl->args[2] = 0; +#endif + mcl->args[MULTI_UVMFLAGS_INDEX] = flags; +} + +#endif /* __XEN_HYPERVISOR_H__ */ Property changes on: xen/hypervisor.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen/hypervisor.h:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen/hypervisor.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: xen/interface/xen.h =================================================================== --- xen/interface/xen.h (.../stable/6/sys) (revision 0) +++ xen/interface/xen.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,639 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include "xen-compat.h" + +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__ia64__) +#include "arch-ia64.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#endif + +/* + * HYPERCALLS + */ + +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +/* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* + * VIRTUAL INTERRUPTS + * + * Virtual interrupts that a guest OS may receive from Xen. + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ + +/* Architecture-specific VIRQ definitions. */ +#define VIRQ_ARCH_0 16 +#define VIRQ_ARCH_1 17 +#define VIRQ_ARCH_2 18 +#define VIRQ_ARCH_3 19 +#define VIRQ_ARCH_4 20 +#define VIRQ_ARCH_5 21 +#define VIRQ_ARCH_6 22 +#define VIRQ_ARCH_7 23 + +#define NR_VIRQS 24 + +/* + * MMU-UPDATE REQUESTS + * + * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * ptr[1:0] specifies the appropriate MMU_* command. + * + * ptr[1:0] == MMU_NORMAL_PT_UPDATE: + * Updates an entry in a page table. If updating an L1 table, and the new + * table entry is valid/present, the mapped frame must belong to the FD, if + * an FD has been specified. If attempting to map an I/O page then the + * caller assumes the privilege of the FD. + * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. + * FD == DOMID_XEN: Map restricted areas of Xen's heap space. + * ptr[:2] -- Machine address of the page-table entry to modify. + * val -- Value to write. + * + * ptr[1:0] == MMU_MACHPHYS_UPDATE: + * Updates an entry in the machine->pseudo-physical mapping table. + * ptr[:2] -- Machine address within the frame whose mapping to modify. + * The frame must belong to the FD, if one is specified. + * val -- Value to write into the mapping entry. + * + * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: + * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed + * with those in @val. + */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ + +/* + * MMU EXTENDED OPERATIONS + * + * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures. + * A foreigndom (FD) can be specified (or DOMID_SELF for none). + * Where the FD has some effect, it is described below. + * + * cmd: MMUEXT_(UN)PIN_*_TABLE + * mfn: Machine frame number to be (un)pinned as a p.t. page. + * The frame must belong to the FD, if one is specified. + * + * cmd: MMUEXT_NEW_BASEPTR + * mfn: Machine frame number of new page-table base to install in MMU. + * + * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only] + * mfn: Machine frame number of new page-table base to install in MMU + * when in user space. + * + * cmd: MMUEXT_TLB_FLUSH_LOCAL + * No additional arguments. Flushes local TLB. + * + * cmd: MMUEXT_INVLPG_LOCAL + * linear_addr: Linear address to be flushed from the local TLB. + * + * cmd: MMUEXT_TLB_FLUSH_MULTI + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_INVLPG_MULTI + * linear_addr: Linear address to be flushed. + * vcpumask: Pointer to bitmap of VCPUs to be flushed. + * + * cmd: MMUEXT_TLB_FLUSH_ALL + * No additional arguments. Flushes all VCPUs' TLBs. + * + * cmd: MMUEXT_INVLPG_ALL + * linear_addr: Linear address to be flushed from all VCPUs' TLBs. + * + * cmd: MMUEXT_FLUSH_CACHE + * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_SET_LDT + * linear_addr: Linear address of LDT base (NB. must be page-aligned). + * nr_ents: Number of entries in LDT. + */ +#define MMUEXT_PIN_L1_TABLE 0 +#define MMUEXT_PIN_L2_TABLE 1 +#define MMUEXT_PIN_L3_TABLE 2 +#define MMUEXT_PIN_L4_TABLE 3 +#define MMUEXT_UNPIN_TABLE 4 +#define MMUEXT_NEW_BASEPTR 5 +#define MMUEXT_TLB_FLUSH_LOCAL 6 +#define MMUEXT_INVLPG_LOCAL 7 +#define MMUEXT_TLB_FLUSH_MULTI 8 +#define MMUEXT_INVLPG_MULTI 9 +#define MMUEXT_TLB_FLUSH_ALL 10 +#define MMUEXT_INVLPG_ALL 11 +#define MMUEXT_FLUSH_CACHE 12 +#define MMUEXT_SET_LDT 13 +#define MMUEXT_NEW_USER_BASEPTR 15 + +#ifndef __ASSEMBLY__ +struct mmuext_op { + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(void) vcpumask; +#else + void *vcpumask; +#endif + } arg2; +}; +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); +#endif + +/* These are passed as 'flags' to update_va_mapping. They can be ORed. */ +/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap. */ +/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer. */ +#define UVMF_NONE (0UL<<0) /* No flushing at all. */ +#define UVMF_TLB_FLUSH (1UL<<0) /* Flush entire TLB(s). */ +#define UVMF_INVLPG (2UL<<0) /* Flush only one entry. */ +#define UVMF_FLUSHTYPE_MASK (3UL<<0) +#define UVMF_MULTI (0UL<<2) /* Flush subset of TLBs. */ +#define UVMF_LOCAL (0UL<<2) /* Flush local TLB. */ +#define UVMF_ALL (1UL<<2) /* Flush all TLBs. */ + +/* + * Commands to HYPERVISOR_console_io(). + */ +#define CONSOLEIO_write 0 +#define CONSOLEIO_read 1 + +/* + * Commands to HYPERVISOR_vm_assist(). + */ +#define VMASST_CMD_enable 0 +#define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ +#define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ +#define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ +#define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ +#define VMASST_TYPE_pae_extended_cr3 3 + +#define MAX_VMASST_TYPE 3 + +#ifndef __ASSEMBLY__ + +typedef uint16_t domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* + * Send an array of these to HYPERVISOR_mmu_update(). + * NB. The fields are natural pointer/address size for this architecture. + */ +struct mmu_update { + uint64_t ptr; /* Machine address of PTE. */ + uint64_t val; /* New contents of PTE. */ +}; +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); + +/* + * Send an array of these to HYPERVISOR_multicall(). + * NB. The fields are natural register size for this architecture. + */ +struct multicall_entry { + unsigned long op, result; + unsigned long args[6]; +}; +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); + +/* + * Event channel endpoints per domain: + * 1024 if a long is 32 bits; 4096 if a long is 64 bits. + */ +#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) + +struct vcpu_time_info { + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; +}; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; + struct vcpu_time_info time; +}; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + unsigned long evtchn_pending[sizeof(unsigned long) * 8]; + unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ + + struct arch_shared_info arch; + +}; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif + +/* + * Start-of-day memory layout: + * 1. The domain is started within contiguous virtual-memory region. + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: + * a. relocated kernel image + * b. initial ram disk [mod_start, mod_len] + * c. list of allocated page frames [mfn_list, nr_pages] + * d. start_info_t structure [register ESI (x86)] + * e. bootstrap page tables [pt_base, CR3 (x86)] + * f. bootstrap stack [register ESP (x86)] + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory + * layout for the domain. In particular, the bootstrap virtual-memory + * region is a 1:1 mapping to the first section of the pseudo-physical map. + * 7. All bootstrap elements are mapped read-writable for the guest OS. The + * only exception is the bootstrap page table, which is mapped read-only. + * 8. There is guaranteed to be at least 512kB padding after the final + * bootstrap element. If necessary, the bootstrap virtual region is + * extended by an extra 4MB to ensure this. + */ + +#define MAX_GUEST_CMDLINE 1024 +struct start_info { + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; +}; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif + +/* These flags are passed in the 'flags' field of start_info_t. */ +#define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ +#define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t + +typedef uint8_t xen_domain_handle_t[16]; + +/* Turn a plain number into a C unsigned long constant. */ +#define __mk_unsigned_long(x) x ## UL +#define mk_unsigned_long(x) __mk_unsigned_long(x) + +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif +#endif + +#endif /* __XEN_PUBLIC_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/xen.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-powerpc.h =================================================================== --- xen/interface/arch-powerpc.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-powerpc.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,120 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2005, 2006 + * + * Authors: Hollis Blanchard + */ + +#include "xen.h" + +#ifndef __XEN_PUBLIC_ARCH_PPC_64_H__ +#define __XEN_PUBLIC_ARCH_PPC_64_H__ + +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { \ + int __pad[(sizeof (long long) - sizeof (void *)) / sizeof (int)]; \ + type *p; \ + } __attribute__((__aligned__(8))) __guest_handle_ ## name + +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof ((hnd).__pad)) \ + (hnd).__pad[0] = 0; \ + (hnd).p = val; \ + } while (0) + +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long long xen_pfn_t; +#define PRI_xen_pfn "llx" +#endif + +/* + * Pointers and other address fields inside interface structures are padded to + * 64 bits. This means that field alignments aren't different between 32- and + * 64-bit architectures. + */ +/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */ +#define __MEMORY_PADDING(_X) +#define _MEMORY_PADDING(_X) __MEMORY_PADDING(_X) +#define MEMORY_PADDING _MEMORY_PADDING(__LINE__) + +/* And the trap vector is... */ +#define TRAP_INSTR "li 0,-1; sc" /* XXX just "sc"? */ + +#ifndef __ASSEMBLY__ + +#define XENCOMM_INLINE_FLAG (1UL << 63) + +typedef uint64_t xen_ulong_t; + +/* User-accessible registers: nost of these need to be saved/restored + * for every nested Xen invocation. */ +struct cpu_user_regs +{ + uint64_t gprs[32]; + uint64_t lr; + uint64_t ctr; + uint64_t srr0; + uint64_t srr1; + uint64_t pc; + uint64_t msr; + uint64_t fpscr; /* XXX Is this necessary */ + uint64_t xer; + uint64_t hid4; /* debug only */ + uint64_t dar; /* debug only */ + uint32_t dsisr; /* debug only */ + uint32_t cr; + uint32_t __pad; /* good spot for another 32bit reg */ + uint32_t entry_vector; +}; +typedef struct cpu_user_regs cpu_user_regs_t; + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */ + +/* ONLY used to communicate with dom0! See also struct exec_domain. */ +struct vcpu_guest_context { + cpu_user_regs_t user_regs; /* User-level CPU registers */ + uint64_t sdr1; /* Pagetable base */ + /* XXX etc */ +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + uint64_t boot_timebase; +}; + +struct arch_vcpu_info { +}; + +/* Support for multi-processor guests. */ +#define MAX_VIRT_CPUS 32 +#endif + +#endif Property changes on: xen/interface/arch-powerpc.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/callback.h =================================================================== --- xen/interface/callback.h (.../stable/6/sys) (revision 0) +++ xen/interface/callback.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,121 @@ +/****************************************************************************** + * callback.h + * + * Register guest OS callbacks with Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __XEN_PUBLIC_CALLBACK_H__ +#define __XEN_PUBLIC_CALLBACK_H__ + +#include "xen.h" + +/* + * Prototype for this hypercall is: + * long callback_op(int cmd, void *extra_args) + * @cmd == CALLBACKOP_??? (callback operation). + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* ia64, x86: Callback for event delivery. */ +#define CALLBACKTYPE_event 0 + +/* x86: Failsafe callback when guest state cannot be restored by Xen. */ +#define CALLBACKTYPE_failsafe 1 + +/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */ +#define CALLBACKTYPE_syscall 2 + +/* + * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel + * feature is enabled. Do not use this callback type in new code. + */ +#define CALLBACKTYPE_sysenter_deprecated 3 + +/* x86: Callback for NMI delivery. */ +#define CALLBACKTYPE_nmi 4 + +/* + * x86: sysenter is only available as follows: + * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled + * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + * [nb. also 64-bit guest applications on Intel CPUs + * ('64-on-64-on-64'), but syscall is preferred] + */ +#define CALLBACKTYPE_sysenter 5 + +/* + * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs + * ('32-on-32-on-64', '32-on-64-on-64') + */ +#define CALLBACKTYPE_syscall32 7 + +/* + * Disable event deliver during callback? This flag is ignored for event and + * NMI callbacks: event delivery is unconditionally disabled. + */ +#define _CALLBACKF_mask_events 0 +#define CALLBACKF_mask_events (1U << _CALLBACKF_mask_events) + +/* + * Register a callback. + */ +#define CALLBACKOP_register 0 +struct callback_register { + uint16_t type; + uint16_t flags; + xen_callback_t address; +}; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); + +/* + * Unregister a callback. + * + * Not all callbacks can be unregistered. -EINVAL will be returned if + * you attempt to unregister such a callback. + */ +#define CALLBACKOP_unregister 1 +struct callback_unregister { + uint16_t type; + uint16_t _unused; +}; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif + +#endif /* __XEN_PUBLIC_CALLBACK_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/callback.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/elfnote.h =================================================================== --- xen/interface/elfnote.h (.../stable/6/sys) (revision 0) +++ xen/interface/elfnote.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,233 @@ +/****************************************************************************** + * elfnote.h + * + * Definitions used for the Xen ELF notes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Ian Campbell, XenSource Ltd. + */ + +#ifndef __XEN_PUBLIC_ELFNOTE_H__ +#define __XEN_PUBLIC_ELFNOTE_H__ + +/* + * The notes should live in a PT_NOTE segment and have "Xen" in the + * name field. + * + * Numeric types are either 4 or 8 bytes depending on the content of + * the desc field. + * + * LEGACY indicated the fields in the legacy __xen_guest string which + * this a note type replaces. + */ + +/* + * NAME=VALUE pair (string). + */ +#define XEN_ELFNOTE_INFO 0 + +/* + * The virtual address of the entry point (numeric). + * + * LEGACY: VIRT_ENTRY + */ +#define XEN_ELFNOTE_ENTRY 1 + +/* The virtual address of the hypercall transfer page (numeric). + * + * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page + * number not a virtual address) + */ +#define XEN_ELFNOTE_HYPERCALL_PAGE 2 + +/* The virtual address where the kernel image should be mapped (numeric). + * + * Defaults to 0. + * + * LEGACY: VIRT_BASE + */ +#define XEN_ELFNOTE_VIRT_BASE 3 + +/* + * The offset of the ELF paddr field from the acutal required + * psuedo-physical address (numeric). + * + * This is used to maintain backwards compatibility with older kernels + * which wrote __PAGE_OFFSET into that field. This field defaults to 0 + * if not present. + * + * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE) + */ +#define XEN_ELFNOTE_PADDR_OFFSET 4 + +/* + * The version of Xen that we work with (string). + * + * LEGACY: XEN_VER + */ +#define XEN_ELFNOTE_XEN_VERSION 5 + +/* + * The name of the guest operating system (string). + * + * LEGACY: GUEST_OS + */ +#define XEN_ELFNOTE_GUEST_OS 6 + +/* + * The version of the guest operating system (string). + * + * LEGACY: GUEST_VER + */ +#define XEN_ELFNOTE_GUEST_VERSION 7 + +/* + * The loader type (string). + * + * LEGACY: LOADER + */ +#define XEN_ELFNOTE_LOADER 8 + +/* + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. + * + * LEGACY: PAE (n.b. The legacy interface included a provision to + * indicate 'extended-cr3' support allowing L3 page tables to be + * placed above 4G. It is assumed that any kernel new enough to use + * these ELF notes will include this and therefore "yes" here is + * equivalent to "yes[entended-cr3]" in the __xen_guest interface. + */ +#define XEN_ELFNOTE_PAE_MODE 9 + +/* + * The features supported/required by this kernel (string). + * + * The string must consist of a list of feature names (as given in + * features.h, without the "XENFEAT_" prefix) separated by '|' + * characters. If a feature is required for the kernel to function + * then the feature name must be preceded by a '!' character. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_FEATURES 10 + +/* + * The kernel requires the symbol table to be loaded (string = "yes" or "no") + * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence + * of this string as a boolean flag rather than requiring "yes" or + * "no". + */ +#define XEN_ELFNOTE_BSD_SYMTAB 11 + +/* + * The lowest address the hypervisor hole can begin at (numeric). + * + * This must not be set higher than HYPERVISOR_VIRT_START. Its presence + * also indicates to the hypervisor that the kernel can deal with the + * hole starting at a higher address. + */ +#define XEN_ELFNOTE_HV_START_LOW 12 + +/* + * List of maddr_t-sized mask/value pairs describing how to recognize + * (non-present) L1 page table entries carrying valid MFNs (numeric). + */ +#define XEN_ELFNOTE_L1_MFN_VALID 13 + +/* + * Whether or not the guest supports cooperative suspend cancellation. + */ +#define XEN_ELFNOTE_SUSPEND_CANCEL 14 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + +#endif /* __XEN_PUBLIC_ELFNOTE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/elfnote.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/libelf.h =================================================================== --- xen/interface/libelf.h (.../stable/6/sys) (revision 0) +++ xen/interface/libelf.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,265 @@ +/****************************************************************************** + * libelf.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XC_LIBELF__ +#define __XC_LIBELF__ 1 + +#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) +#define XEN_ELF_LITTLE_ENDIAN +#else +#error define architectural endianness +#endif + +#undef ELFSIZE +#include "elfnote.h" +#include "elfstructs.h" +#include "features.h" + +/* ------------------------------------------------------------------------ */ + +typedef union { + Elf32_Ehdr e32; + Elf64_Ehdr e64; +} elf_ehdr; + +typedef union { + Elf32_Phdr e32; + Elf64_Phdr e64; +} elf_phdr; + +typedef union { + Elf32_Shdr e32; + Elf64_Shdr e64; +} elf_shdr; + +typedef union { + Elf32_Sym e32; + Elf64_Sym e64; +} elf_sym; + +typedef union { + Elf32_Rel e32; + Elf64_Rel e64; +} elf_rel; + +typedef union { + Elf32_Rela e32; + Elf64_Rela e64; +} elf_rela; + +typedef union { + Elf32_Note e32; + Elf64_Note e64; +} elf_note; + +struct elf_binary { + /* elf binary */ + const char *image; + size_t size; + char class; + char data; + + const elf_ehdr *ehdr; + const char *sec_strtab; + const elf_shdr *sym_tab; + const char *sym_strtab; + + /* loaded to */ + char *dest; + uint64_t pstart; + uint64_t pend; + uint64_t reloc_offset; + + uint64_t bsd_symtab_pstart; + uint64_t bsd_symtab_pend; + +#ifndef __XEN__ + /* misc */ + FILE *log; +#endif + int verbose; +}; + +/* ------------------------------------------------------------------------ */ +/* accessing elf header fields */ + +#ifdef XEN_ELF_BIG_ENDIAN +# define NATIVE_ELFDATA ELFDATA2MSB +#else +# define NATIVE_ELFDATA ELFDATA2LSB +#endif + +#define elf_32bit(elf) (ELFCLASS32 == (elf)->class) +#define elf_64bit(elf) (ELFCLASS64 == (elf)->class) +#define elf_msb(elf) (ELFDATA2MSB == (elf)->data) +#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data) +#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data) + +#define elf_uval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_unsigned((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_sval(elf, str, elem) \ + ((ELFCLASS64 == (elf)->class) \ + ? elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e64.elem), \ + sizeof((str)->e64.elem)) \ + : elf_access_signed((elf), (str), \ + offsetof(typeof(*(str)),e32.elem), \ + sizeof((str)->e32.elem))) + +#define elf_size(elf, str) \ + ((ELFCLASS64 == (elf)->class) \ + ? sizeof((str)->e64) : sizeof((str)->e32)) + +uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr, + uint64_t offset, size_t size); +int64_t elf_access_signed(struct elf_binary *elf, const void *ptr, + uint64_t offset, size_t size); + +uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_tools.c */ + +int elf_shdr_count(struct elf_binary *elf); +int elf_phdr_count(struct elf_binary *elf); + +const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name); +const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index); +const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index); + +const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr); +const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr); +const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr); + +const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr); +const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr); + +const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol); +const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index); + +const char *elf_note_name(struct elf_binary *elf, const elf_note * note); +const void *elf_note_desc(struct elf_binary *elf, const elf_note * note); +uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note); +const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note); + +int elf_is_elfbinary(const void *image); +int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_loader.c */ + +int elf_init(struct elf_binary *elf, const char *image, size_t size); +#ifdef __XEN__ +void elf_set_verbose(struct elf_binary *elf); +#else +void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose); +#endif + +void elf_parse_binary(struct elf_binary *elf); +void elf_load_binary(struct elf_binary *elf); + +void *elf_get_ptr(struct elf_binary *elf, unsigned long addr); +uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol); + +void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */ + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_relocate.c */ + +int elf_reloc(struct elf_binary *elf); + +/* ------------------------------------------------------------------------ */ +/* xc_libelf_dominfo.c */ + +#define UNSET_ADDR ((uint64_t)-1) + +enum xen_elfnote_type { + XEN_ENT_NONE = 0, + XEN_ENT_LONG = 1, + XEN_ENT_STR = 2 +}; + +struct xen_elfnote { + enum xen_elfnote_type type; + const char *name; + union { + const char *str; + uint64_t num; + } data; +}; + +struct elf_dom_parms { + /* raw */ + const char *guest_info; + const void *elf_note_start; + const void *elf_note_end; + struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1]; + + /* parsed */ + char guest_os[16]; + char guest_ver[16]; + char xen_ver[16]; + char loader[16]; + int pae; + int bsd_symtab; + uint64_t virt_base; + uint64_t virt_entry; + uint64_t virt_hypercall; + uint64_t virt_hv_start_low; + uint64_t elf_paddr_offset; + uint32_t f_supported[XENFEAT_NR_SUBMAPS]; + uint32_t f_required[XENFEAT_NR_SUBMAPS]; + + /* calculated */ + uint64_t virt_offset; + uint64_t virt_kstart; + uint64_t virt_kend; +}; + +static inline void elf_xen_feature_set(int nr, uint32_t * addr) +{ + addr[nr >> 5] |= 1 << (nr & 31); +} +static inline int elf_xen_feature_get(int nr, uint32_t * addr) +{ + return !!(addr[nr >> 5] & (1 << (nr & 31))); +} + +int elf_xen_parse_features(const char *features, + uint32_t *supported, + uint32_t *required); +int elf_xen_parse_note(struct elf_binary *elf, + struct elf_dom_parms *parms, + const elf_note *note); +int elf_xen_parse_guest_info(struct elf_binary *elf, + struct elf_dom_parms *parms); +int elf_xen_parse(struct elf_binary *elf, + struct elf_dom_parms *parms); + +#endif /* __XC_LIBELF__ */ Property changes on: xen/interface/libelf.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/trace.h =================================================================== --- xen/interface/trace.h (.../stable/6/sys) (revision 0) +++ xen/interface/trace.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,193 @@ +/****************************************************************************** + * include/public/trace.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Mark Williamson, (C) 2004 Intel Research Cambridge + * Copyright (C) 2005 Bin Ren + */ + +#ifndef __XEN_PUBLIC_TRACE_H__ +#define __XEN_PUBLIC_TRACE_H__ + +#define TRACE_EXTRA_MAX 7 +#define TRACE_EXTRA_SHIFT 28 + +/* Trace classes */ +#define TRC_CLS_SHIFT 16 +#define TRC_GEN 0x0001f000 /* General trace */ +#define TRC_SCHED 0x0002f000 /* Xen Scheduler trace */ +#define TRC_DOM0OP 0x0004f000 /* Xen DOM0 operation trace */ +#define TRC_HVM 0x0008f000 /* Xen HVM trace */ +#define TRC_MEM 0x0010f000 /* Xen memory trace */ +#define TRC_PV 0x0020f000 /* Xen PV traces */ +#define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_ALL 0x0ffff000 +#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) +#define TRC_HD_CYCLE_FLAG (1UL<<31) +#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) ) +#define TRC_HD_EXTRA(x) (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX) + +/* Trace subclasses */ +#define TRC_SUBCLS_SHIFT 12 + +/* trace subclasses for SVM */ +#define TRC_HVM_ENTRYEXIT 0x00081000 /* VMENTRY and #VMEXIT */ +#define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ + +#define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ + +/* Trace events per class */ +#define TRC_LOST_RECORDS (TRC_GEN + 1) +#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) +#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) + +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) +#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) +#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) +#define TRC_SCHED_WAKE (TRC_SCHED_VERBOSE + 4) +#define TRC_SCHED_YIELD (TRC_SCHED_VERBOSE + 5) +#define TRC_SCHED_BLOCK (TRC_SCHED_VERBOSE + 6) +#define TRC_SCHED_SHUTDOWN (TRC_SCHED_VERBOSE + 7) +#define TRC_SCHED_CTL (TRC_SCHED_VERBOSE + 8) +#define TRC_SCHED_ADJDOM (TRC_SCHED_VERBOSE + 9) +#define TRC_SCHED_SWITCH (TRC_SCHED_VERBOSE + 10) +#define TRC_SCHED_S_TIMER_FN (TRC_SCHED_VERBOSE + 11) +#define TRC_SCHED_T_TIMER_FN (TRC_SCHED_VERBOSE + 12) +#define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) +#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) +#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) + +#define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) +#define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) +#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) + +#define TRC_PV_HYPERCALL (TRC_PV + 1) +#define TRC_PV_TRAP (TRC_PV + 3) +#define TRC_PV_PAGE_FAULT (TRC_PV + 4) +#define TRC_PV_FORCED_INVALID_OP (TRC_PV + 5) +#define TRC_PV_EMULATE_PRIVOP (TRC_PV + 6) +#define TRC_PV_EMULATE_4GB (TRC_PV + 7) +#define TRC_PV_MATH_STATE_RESTORE (TRC_PV + 8) +#define TRC_PV_PAGING_FIXUP (TRC_PV + 9) +#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10) +#define TRC_PV_PTWR_EMULATION (TRC_PV + 11) +#define TRC_PV_PTWR_EMULATION_PAE (TRC_PV + 12) + /* Indicates that addresses in trace record are 64 bits */ +#define TRC_64_FLAG (0x100) + +#define TRC_SHADOW_NOT_SHADOW (TRC_SHADOW + 1) +#define TRC_SHADOW_FAST_PROPAGATE (TRC_SHADOW + 2) +#define TRC_SHADOW_FAST_MMIO (TRC_SHADOW + 3) +#define TRC_SHADOW_FALSE_FAST_PATH (TRC_SHADOW + 4) +#define TRC_SHADOW_MMIO (TRC_SHADOW + 5) +#define TRC_SHADOW_FIXUP (TRC_SHADOW + 6) +#define TRC_SHADOW_DOMF_DYING (TRC_SHADOW + 7) +#define TRC_SHADOW_EMULATE (TRC_SHADOW + 8) +#define TRC_SHADOW_EMULATE_UNSHADOW_USER (TRC_SHADOW + 9) +#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ (TRC_SHADOW + 10) +#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11) +#define TRC_SHADOW_WRMAP_BF (TRC_SHADOW + 12) +#define TRC_SHADOW_PREALLOC_UNPIN (TRC_SHADOW + 13) +#define TRC_SHADOW_RESYNC_FULL (TRC_SHADOW + 14) +#define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) + +/* trace events per subclass */ +#define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) +#define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) +#define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) +#define TRC_HVM_PF_XEN (TRC_HVM_HANDLER + 0x01) +#define TRC_HVM_PF_XEN64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01) +#define TRC_HVM_PF_INJECT (TRC_HVM_HANDLER + 0x02) +#define TRC_HVM_PF_INJECT64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02) +#define TRC_HVM_INJ_EXC (TRC_HVM_HANDLER + 0x03) +#define TRC_HVM_INJ_VIRQ (TRC_HVM_HANDLER + 0x04) +#define TRC_HVM_REINJ_VIRQ (TRC_HVM_HANDLER + 0x05) +#define TRC_HVM_IO_READ (TRC_HVM_HANDLER + 0x06) +#define TRC_HVM_IO_WRITE (TRC_HVM_HANDLER + 0x07) +#define TRC_HVM_CR_READ (TRC_HVM_HANDLER + 0x08) +#define TRC_HVM_CR_READ64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08) +#define TRC_HVM_CR_WRITE (TRC_HVM_HANDLER + 0x09) +#define TRC_HVM_CR_WRITE64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09) +#define TRC_HVM_DR_READ (TRC_HVM_HANDLER + 0x0A) +#define TRC_HVM_DR_WRITE (TRC_HVM_HANDLER + 0x0B) +#define TRC_HVM_MSR_READ (TRC_HVM_HANDLER + 0x0C) +#define TRC_HVM_MSR_WRITE (TRC_HVM_HANDLER + 0x0D) +#define TRC_HVM_CPUID (TRC_HVM_HANDLER + 0x0E) +#define TRC_HVM_INTR (TRC_HVM_HANDLER + 0x0F) +#define TRC_HVM_NMI (TRC_HVM_HANDLER + 0x10) +#define TRC_HVM_SMI (TRC_HVM_HANDLER + 0x11) +#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12) +#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13) +#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) +#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) +#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) +#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17) +#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) +#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) +#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) + +/* This structure represents a single trace buffer record. */ +struct t_rec { + uint32_t event:28; + uint32_t extra_u32:3; /* # entries in trailing extra_u32[] array */ + uint32_t cycles_included:1; /* u.cycles or u.no_cycles? */ + union { + struct { + uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */ + uint32_t extra_u32[7]; /* event data items */ + } cycles; + struct { + uint32_t extra_u32[7]; /* event data items */ + } nocycles; + } u; +}; + +/* + * This structure contains the metadata for a single trace buffer. The head + * field, indexes into an array of struct t_rec's. + */ +struct t_buf { + /* Assume the data buffer size is X. X is generally not a power of 2. + * CONS and PROD are incremented modulo (2*X): + * 0 <= cons < 2*X + * 0 <= prod < 2*X + * This is done because addition modulo X breaks at 2^32 when X is not a + * power of 2: + * (((2^32 - 1) % X) + 1) % X != (2^32) % X + */ + uint32_t cons; /* Offset of next item to be consumed by control tools. */ + uint32_t prod; /* Offset of next item to be produced by Xen. */ + /* Records follow immediately after the meta-data header. */ +}; + +#endif /* __XEN_PUBLIC_TRACE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/trace.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/foreign/mkchecker.py =================================================================== --- xen/interface/foreign/mkchecker.py (.../stable/6/sys) (revision 0) +++ xen/interface/foreign/mkchecker.py (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,58 @@ +#!/usr/bin/python + +import sys; +from structs import structs; + +# command line arguments +arch = sys.argv[1]; +outfile = sys.argv[2]; +archs = sys.argv[3:]; + +f = open(outfile, "w"); +f.write(''' +/* + * sanity checks for generated foreign headers: + * - verify struct sizes + * + * generated by %s -- DO NOT EDIT + */ +#include +#include +#include +#include +#include "../xen.h" +'''); + +for a in archs: + f.write('#include "%s.h"\n' % a); + +f.write('int main(int argc, char *argv[])\n{\n'); + +f.write('\tprintf("\\n");'); +f.write('printf("%-25s |", "structs");\n'); +for a in archs: + f.write('\tprintf("%%8s", "%s");\n' % a); +f.write('\tprintf("\\n");'); + +f.write('\tprintf("\\n");'); +for struct in structs: + f.write('\tprintf("%%-25s |", "%s");\n' % struct); + for a in archs: + if a == arch: + s = struct; # native + else: + s = struct + "_" + a; + f.write('#ifdef %s_has_no_%s\n' % (a, struct)); + f.write('\tprintf("%8s", "-");\n'); + f.write("#else\n"); + f.write('\tprintf("%%8zd", sizeof(struct %s));\n' % s); + f.write("#endif\n"); + + f.write('\tprintf("\\n");\n\n'); + +f.write('\tprintf("\\n");\n'); +f.write('\texit(0);\n'); +f.write('}\n'); + +f.close(); + Property changes on: xen/interface/foreign/mkchecker.py ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/foreign/mkheader.py =================================================================== --- xen/interface/foreign/mkheader.py (.../stable/6/sys) (revision 0) +++ xen/interface/foreign/mkheader.py (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,167 @@ +#!/usr/bin/python + +import sys, re; +from structs import unions, structs, defines; + +# command line arguments +arch = sys.argv[1]; +outfile = sys.argv[2]; +infiles = sys.argv[3:]; + + +########################################################################### +# configuration #2: architecture information + +inttypes = {}; +header = {}; +footer = {}; + +# x86_32 +inttypes["x86_32"] = { + "unsigned long" : "uint32_t", + "long" : "uint32_t", + "xen_pfn_t" : "uint32_t", +}; +header["x86_32"] = """ +#define __i386___X86_32 1 +#pragma pack(4) +"""; +footer["x86_32"] = """ +#pragma pack() +"""; + +# x86_64 +inttypes["x86_64"] = { + "unsigned long" : "__align8__ uint64_t", + "long" : "__align8__ uint64_t", + "xen_pfn_t" : "__align8__ uint64_t", +}; +header["x86_64"] = """ +#ifdef __GNUC__ +# define __DECL_REG(name) union { uint64_t r ## name, e ## name; } +# define __align8__ __attribute__((aligned (8))) +#else +# define __DECL_REG(name) uint64_t r ## name +# define __align8__ FIXME +#endif +#define __x86_64___X86_64 1 +"""; + +# ia64 +inttypes["ia64"] = { + "unsigned long" : "__align8__ uint64_t", + "long" : "__align8__ uint64_t", + "xen_pfn_t" : "__align8__ uint64_t", + "long double" : "__align16__ ldouble_t", +}; +header["ia64"] = """ +#define __align8__ __attribute__((aligned (8))) +#define __align16__ __attribute__((aligned (16))) +typedef unsigned char ldouble_t[16]; +"""; + + +########################################################################### +# main + +input = ""; +output = ""; +fileid = re.sub("[-.]", "_", "__FOREIGN_%s__" % outfile.upper()); + +# read input header files +for name in infiles: + f = open(name, "r"); + input += f.read(); + f.close(); + +# add header +output += """ +/* + * public xen defines and struct for %s + * generated by %s -- DO NOT EDIT + */ + +#ifndef %s +#define %s 1 + +""" % (arch, sys.argv[0], fileid, fileid) + +if arch in header: + output += header[arch]; + output += "\n"; + +# add defines to output +for line in re.findall("#define[^\n]+", input): + for define in defines: + regex = "#define\s+%s\\b" % define; + match = re.search(regex, line); + if None == match: + continue; + if define.upper()[0] == define[0]: + replace = define + "_" + arch.upper(); + else: + replace = define + "_" + arch; + regex = "\\b%s\\b" % define; + output += re.sub(regex, replace, line) + "\n"; +output += "\n"; + +# delete defines, comments, empty lines +input = re.sub("#define[^\n]+\n", "", input); +input = re.compile("/\*(.*?)\*/", re.S).sub("", input) +input = re.compile("\n\s*\n", re.S).sub("\n", input); + +# add unions to output +for union in unions: + regex = "union\s+%s\s*\{(.*?)\n\};" % union; + match = re.search(regex, input, re.S) + if None == match: + output += "#define %s_has_no_%s 1\n" % (arch, union); + else: + output += "union %s_%s {%s\n};\n" % (union, arch, match.group(1)); + output += "\n"; + +# add structs to output +for struct in structs: + regex = "struct\s+%s\s*\{(.*?)\n\};" % struct; + match = re.search(regex, input, re.S) + if None == match: + output += "#define %s_has_no_%s 1\n" % (arch, struct); + else: + output += "struct %s_%s {%s\n};\n" % (struct, arch, match.group(1)); + output += "typedef struct %s_%s %s_%s_t;\n" % (struct, arch, struct, arch); + output += "\n"; + +# add footer +if arch in footer: + output += footer[arch]; + output += "\n"; +output += "#endif /* %s */\n" % fileid; + +# replace: defines +for define in defines: + if define.upper()[0] == define[0]: + replace = define + "_" + arch.upper(); + else: + replace = define + "_" + arch; + output = re.sub("\\b%s\\b" % define, replace, output); + +# replace: unions +for union in unions: + output = re.sub("\\b(union\s+%s)\\b" % union, "\\1_%s" % arch, output); + +# replace: structs + struct typedefs +for struct in structs: + output = re.sub("\\b(struct\s+%s)\\b" % struct, "\\1_%s" % arch, output); + output = re.sub("\\b(%s)_t\\b" % struct, "\\1_%s_t" % arch, output); + +# replace: integer types +integers = inttypes[arch].keys(); +integers.sort(lambda a, b: cmp(len(b),len(a))); +for type in integers: + output = re.sub("\\b%s\\b" % type, inttypes[arch][type], output); + +# print results +f = open(outfile, "w"); +f.write(output); +f.close; + Property changes on: xen/interface/foreign/mkheader.py ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/foreign/reference.size =================================================================== --- xen/interface/foreign/reference.size (.../stable/6/sys) (revision 0) +++ xen/interface/foreign/reference.size (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,17 @@ + +structs | x86_32 x86_64 ia64 + +start_info | 1104 1152 1152 +trap_info | 8 16 - +pt_fpreg | - - 16 +cpu_user_regs | 68 200 496 +xen_ia64_boot_param | - - 96 +ia64_tr_entry | - - 32 +vcpu_extra_regs | - - 536 +vcpu_guest_context | 2800 5168 1056 +arch_vcpu_info | 24 16 0 +vcpu_time_info | 32 32 32 +vcpu_info | 64 64 48 +arch_shared_info | 268 280 272 +shared_info | 2584 3368 4384 + Property changes on: xen/interface/foreign/reference.size ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/foreign/Makefile =================================================================== --- xen/interface/foreign/Makefile (.../stable/6/sys) (revision 0) +++ xen/interface/foreign/Makefile (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,37 @@ +XEN_ROOT=../../../.. +include $(XEN_ROOT)/Config.mk + +architectures := x86_32 x86_64 ia64 +headers := $(patsubst %, %.h, $(architectures)) +scripts := $(wildcard *.py) + +.PHONY: all clean check-headers +all: $(headers) check-headers + +clean: + rm -f $(headers) + rm -f checker checker.c $(XEN_TARGET_ARCH).size + rm -f *.pyc *.o *~ + +ifeq ($(CROSS_COMPILE)$(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH)) +check-headers: checker + ./checker > $(XEN_TARGET_ARCH).size + diff -u reference.size $(XEN_TARGET_ARCH).size +checker: checker.c $(headers) + $(HOSTCC) $(HOSTCFLAGS) -o $@ $< +else +check-headers: + @echo "cross build: skipping check" +endif + +x86_32.h: ../arch-x86/xen-x86_32.h ../arch-x86/xen.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +x86_64.h: ../arch-x86/xen-x86_64.h ../arch-x86/xen.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +ia64.h: ../arch-ia64.h ../xen.h $(scripts) + python mkheader.py $* $@ $(filter %.h,$^) + +checker.c: $(scripts) + python mkchecker.py $(XEN_TARGET_ARCH) $@ $(architectures) Property changes on: xen/interface/foreign/Makefile ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/foreign/structs.py =================================================================== --- xen/interface/foreign/structs.py (.../stable/6/sys) (revision 0) +++ xen/interface/foreign/structs.py (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,58 @@ +# configuration: what needs translation + +unions = [ "vcpu_cr_regs", + "vcpu_ar_regs" ]; + +structs = [ "start_info", + "trap_info", + "pt_fpreg", + "cpu_user_regs", + "xen_ia64_boot_param", + "ia64_tr_entry", + "vcpu_tr_regs", + "vcpu_guest_context_regs", + "vcpu_guest_context", + "arch_vcpu_info", + "vcpu_time_info", + "vcpu_info", + "arch_shared_info", + "shared_info" ]; + +defines = [ "__i386__", + "__x86_64__", + + "FLAT_RING1_CS", + "FLAT_RING1_DS", + "FLAT_RING1_SS", + + "FLAT_RING3_CS64", + "FLAT_RING3_DS64", + "FLAT_RING3_SS64", + "FLAT_KERNEL_CS64", + "FLAT_KERNEL_DS64", + "FLAT_KERNEL_SS64", + + "FLAT_KERNEL_CS", + "FLAT_KERNEL_DS", + "FLAT_KERNEL_SS", + + # x86_{32,64} + "_VGCF_i387_valid", + "VGCF_i387_valid", + "_VGCF_in_kernel", + "VGCF_in_kernel", + "_VGCF_failsafe_disables_events", + "VGCF_failsafe_disables_events", + "_VGCF_syscall_disables_events", + "VGCF_syscall_disables_events", + "_VGCF_online", + "VGCF_online", + + # ia64 + "VGCF_EXTRA_REGS", + + # all archs + "xen_pfn_to_cr3", + "MAX_VIRT_CPUS", + "MAX_GUEST_CMDLINE" ]; + Property changes on: xen/interface/foreign/structs.py ___________________________________________________________________ Added: fbsd:nokeywords + true Property changes on: xen/interface/foreign ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/hvm_info_table.h =================================================================== --- xen/interface/hvm/hvm_info_table.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/hvm_info_table.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,41 @@ +/****************************************************************************** + * hvm/hvm_info_table.h + * + * HVM parameter and information table, written into guest memory map. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ +#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ + +#define HVM_INFO_PFN 0x09F +#define HVM_INFO_OFFSET 0x800 +#define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) + +struct hvm_info_table { + char signature[8]; /* "HVM INFO" */ + uint32_t length; + uint8_t checksum; + uint8_t acpi_enabled; + uint8_t apic_mode; + uint32_t nr_vcpus; +}; + +#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ Property changes on: xen/interface/hvm/hvm_info_table.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/e820.h =================================================================== --- xen/interface/hvm/e820.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/e820.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,34 @@ + +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_E820_H__ +#define __XEN_PUBLIC_HVM_E820_H__ + +/* E820 location in HVM virtual address space. */ +#define HVM_E820_PAGE 0x00090000 +#define HVM_E820_NR_OFFSET 0x000001E8 +#define HVM_E820_OFFSET 0x000002D0 + +#define HVM_BELOW_4G_RAM_END 0xF0000000 +#define HVM_BELOW_4G_MMIO_START HVM_BELOW_4G_RAM_END +#define HVM_BELOW_4G_MMIO_LENGTH ((1ULL << 32) - HVM_BELOW_4G_MMIO_START) + +#endif /* __XEN_PUBLIC_HVM_E820_H__ */ Property changes on: xen/interface/hvm/e820.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/save.h =================================================================== --- xen/interface/hvm/save.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/save.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,88 @@ +/* + * hvm/save.h + * + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_H__ +#define __XEN_PUBLIC_HVM_SAVE_H__ + +/* + * Structures in this header *must* have the same layout in 32bit + * and 64bit environments: this means that all fields must be explicitly + * sized types and aligned to their sizes, and the structs must be + * a multiple of eight bytes long. + * + * Only the state necessary for saving and restoring (i.e. fields + * that are analogous to actual hardware state) should go in this file. + * Internal mechanisms should be kept in Xen-private headers. + */ + +#if !defined(__GNUC__) || defined(__STRICT_ANSI__) +#error "Anonymous structs/unions are a GNU extension." +#endif + +/* + * Each entry is preceded by a descriptor giving its type and length + */ +struct hvm_save_descriptor { + uint16_t typecode; /* Used to demux the various types below */ + uint16_t instance; /* Further demux within a type */ + uint32_t length; /* In bytes, *not* including this descriptor */ +}; + + +/* + * Each entry has a datatype associated with it: for example, the CPU state + * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), + * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU). + * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system + * ugliness. + */ + +#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } + +#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) +#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) +#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) + + +/* + * The series of save records is teminated by a zero-type, zero-length + * descriptor. + */ + +struct hvm_save_end {}; +DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); + +#if defined(__i386__) || defined(__x86_64__) +#include "../arch-x86/hvm/save.h" +#elif defined(__ia64__) +#include "../arch-ia64/hvm/save.h" +#else +#error "unsupported architecture" +#endif + +#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */ Property changes on: xen/interface/hvm/save.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/ioreq.h =================================================================== --- xen/interface/hvm/ioreq.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/ioreq.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,127 @@ +/* + * ioreq.h: I/O request definitions for device models + * Copyright (c) 2004, Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef _IOREQ_H_ +#define _IOREQ_H_ + +#define IOREQ_READ 1 +#define IOREQ_WRITE 0 + +#define STATE_IOREQ_NONE 0 +#define STATE_IOREQ_READY 1 +#define STATE_IOREQ_INPROCESS 2 +#define STATE_IORESP_READY 3 + +#define IOREQ_TYPE_PIO 0 /* pio */ +#define IOREQ_TYPE_COPY 1 /* mmio ops */ +#define IOREQ_TYPE_TIMEOFFSET 7 +#define IOREQ_TYPE_INVALIDATE 8 /* mapcache */ + +/* + * VMExit dispatcher should cooperate with instruction decoder to + * prepare this structure and notify service OS and DM by sending + * virq + */ +struct ioreq { + uint64_t addr; /* physical address */ + uint64_t size; /* size in bytes */ + uint64_t count; /* for rep prefixes */ + uint64_t data; /* data (or paddr of data) */ + uint8_t state:4; + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t df:1; + uint8_t pad:1; + uint8_t type; /* I/O type */ + uint8_t _pad0[6]; + uint64_t io_count; /* How many IO done on a vcpu */ +}; +typedef struct ioreq ioreq_t; + +struct vcpu_iodata { + struct ioreq vp_ioreq; + /* Event channel port, used for notifications to/from the device model. */ + uint32_t vp_eport; + uint32_t _pad0; +}; +typedef struct vcpu_iodata vcpu_iodata_t; + +struct shared_iopage { + struct vcpu_iodata vcpu_iodata[1]; +}; +typedef struct shared_iopage shared_iopage_t; + +struct buf_ioreq { + uint8_t type; /* I/O type */ + uint8_t pad:1; + uint8_t dir:1; /* 1=read, 0=write */ + uint8_t size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */ + uint32_t addr:20;/* physical address */ + uint32_t data; /* data */ +}; +typedef struct buf_ioreq buf_ioreq_t; + +#define IOREQ_BUFFER_SLOT_NUM 511 /* 8 bytes each, plus 2 4-byte indexes */ +struct buffered_iopage { + unsigned int read_pointer; + unsigned int write_pointer; + buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM]; +}; /* NB. Size of this structure must be no greater than one page. */ +typedef struct buffered_iopage buffered_iopage_t; + +#if defined(__ia64__) +struct pio_buffer { + uint32_t page_offset; + uint32_t pointer; + uint32_t data_end; + uint32_t buf_size; + void *opaque; +}; + +#define PIO_BUFFER_IDE_PRIMARY 0 /* I/O port = 0x1F0 */ +#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */ +#define PIO_BUFFER_ENTRY_NUM 2 +struct buffered_piopage { + struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM]; + uint8_t buffer[1]; +}; +#endif /* defined(__ia64__) */ + +#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) +#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) +#define ACPI_GPE0_BLK_LEN 0x08 + +#endif /* _IOREQ_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/hvm/ioreq.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/vmx_assist.h =================================================================== --- xen/interface/hvm/vmx_assist.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/vmx_assist.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,122 @@ +/* + * vmx_assist.h: Context definitions for the VMXASSIST world switch. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Leendert van Doorn, leendert@watson.ibm.com + * Copyright (c) 2005, International Business Machines Corporation. + */ + +#ifndef _VMX_ASSIST_H_ +#define _VMX_ASSIST_H_ + +#define VMXASSIST_BASE 0xD0000 +#define VMXASSIST_MAGIC 0x17101966 +#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8) + +#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12) +#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4) + +#ifndef __ASSEMBLY__ + +#define NR_EXCEPTION_HANDLER 32 +#define NR_INTERRUPT_HANDLERS 16 +#define NR_TRAPS (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS) + +union vmcs_arbytes { + struct arbyte_fields { + unsigned int seg_type : 4, + s : 1, + dpl : 2, + p : 1, + reserved0 : 4, + avl : 1, + reserved1 : 1, + default_ops_size: 1, + g : 1, + null_bit : 1, + reserved2 : 15; + } fields; + unsigned int bytes; +}; + +/* + * World switch state + */ +struct vmx_assist_context { + uint32_t eip; /* execution pointer */ + uint32_t esp; /* stack pointer */ + uint32_t eflags; /* flags register */ + uint32_t cr0; + uint32_t cr3; /* page table directory */ + uint32_t cr4; + uint32_t idtr_limit; /* idt */ + uint32_t idtr_base; + uint32_t gdtr_limit; /* gdt */ + uint32_t gdtr_base; + uint32_t cs_sel; /* cs selector */ + uint32_t cs_limit; + uint32_t cs_base; + union vmcs_arbytes cs_arbytes; + uint32_t ds_sel; /* ds selector */ + uint32_t ds_limit; + uint32_t ds_base; + union vmcs_arbytes ds_arbytes; + uint32_t es_sel; /* es selector */ + uint32_t es_limit; + uint32_t es_base; + union vmcs_arbytes es_arbytes; + uint32_t ss_sel; /* ss selector */ + uint32_t ss_limit; + uint32_t ss_base; + union vmcs_arbytes ss_arbytes; + uint32_t fs_sel; /* fs selector */ + uint32_t fs_limit; + uint32_t fs_base; + union vmcs_arbytes fs_arbytes; + uint32_t gs_sel; /* gs selector */ + uint32_t gs_limit; + uint32_t gs_base; + union vmcs_arbytes gs_arbytes; + uint32_t tr_sel; /* task selector */ + uint32_t tr_limit; + uint32_t tr_base; + union vmcs_arbytes tr_arbytes; + uint32_t ldtr_sel; /* ldtr selector */ + uint32_t ldtr_limit; + uint32_t ldtr_base; + union vmcs_arbytes ldtr_arbytes; + + unsigned char rm_irqbase[2]; +}; +typedef struct vmx_assist_context vmx_assist_context_t; + +#endif /* __ASSEMBLY__ */ + +#endif /* _VMX_ASSIST_H_ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/hvm/vmx_assist.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/params.h =================================================================== --- xen/interface/hvm/params.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/params.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,98 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +/* + * How should CPU0 event-channel notifications be delivered? + * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt). + * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: + * Domain = val[47:32], Bus = val[31:16], + * DevFn = val[15: 8], IntX = val[ 1: 0] + * If val == 0 then CPU0 event-channel notifications are not delivered. + */ +#define HVM_PARAM_CALLBACK_IRQ 0 + +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +#ifdef __ia64__ +#define HVM_PARAM_NVRAM_FD 7 +#define HVM_PARAM_VHPT_SIZE 8 +#define HVM_PARAM_BUFPIOREQ_PFN 9 +#endif + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +#define HVM_NR_PARAMS 15 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ Property changes on: xen/interface/hvm/params.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/hvm/hvm_op.h =================================================================== --- xen/interface/hvm/hvm_op.h (.../stable/6/sys) (revision 0) +++ xen/interface/hvm/hvm_op.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,131 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + uint32_t index; /* IN */ + uint64_t value; /* IN/OUT */ +}; +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Track dirty VRAM. */ +#define HVMOP_track_dirty_vram 6 +struct xen_hvm_track_dirty_vram { + /* Domain to be tracked. */ + domid_t domid; + /* First pfn to track. */ + uint64_aligned_t first_pfn; + /* Number of pages to track. */ + uint64_aligned_t nr; + /* OUT variable. */ + /* Dirty bitmap buffer. */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; +}; +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); + +/* Notify that some pages got modified by the Device Model. */ +#define HVMOP_modified_memory 7 +struct xen_hvm_modified_memory { + /* Domain to be updated. */ + domid_t domid; + /* First pfn. */ + uint64_aligned_t first_pfn; + /* Number of pages. */ + uint64_aligned_t nr; +}; +typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); + +#define HVMOP_set_mem_type 8 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + hvmmem_type_t hvmmem_type; + /* First pfn. */ + uint64_aligned_t first_pfn; + /* Number of pages. */ + uint64_aligned_t nr; +}; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); + + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ Property changes on: xen/interface/hvm/hvm_op.h ___________________________________________________________________ Added: fbsd:nokeywords + true Property changes on: xen/interface/hvm ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86_64.h =================================================================== --- xen/interface/arch-x86_64.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86_64.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include "arch-x86/xen.h" Property changes on: xen/interface/arch-x86_64.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86/xen.h =================================================================== --- xen/interface/arch-x86/xen.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/xen.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,199 @@ +/****************************************************************************** + * arch-x86/xen.h + * + * Guest OS interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_H__ + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#error "using old handle" +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#if defined(__i386__) +#include +#elif defined(__x86_64__) +#include +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +/* + * SEGMENT DESCRIPTOR TABLES + */ +/* + * A number of GDT entries are reserved by Xen. These are not situated at the + * start of the GDT because some stupid OSes export hard-coded selector values + * in their ABI. These hard-coded values are always near the start of the GDT, + * so Xen places itself out of the way, at the far end of the GDT. + */ +#define FIRST_RESERVED_GDT_PAGE 14 +#define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) +#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 32 + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; + +/* + * Send an array of these to HYPERVISOR_set_trap_table(). + * The privilege level specifies which modes may enter a trap via a software + * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate + * privilege levels as follows: + * Level == 0: Noone may enter + * Level == 1: Kernel may enter + * Level == 2: Kernel may enter + * Level == 3: Everyone may enter + */ +#define TI_GET_DPL(_ti) ((_ti)->flags & 3) +#define TI_GET_IF(_ti) ((_ti)->flags & 4) +#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl)) +#define TI_SET_IF(_ti,_if) ((_ti)->flags |= ((!!(_if))<<2)) +struct trap_info { + uint8_t vector; /* exception vector */ + uint8_t flags; /* 0-3: privilege level; 4: clear event enable? */ + uint16_t cs; /* code selector */ + unsigned long address; /* code offset */ +}; +typedef struct trap_info trap_info_t; +DEFINE_XEN_GUEST_HANDLE(trap_info_t); + +typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ + +/* + * The following is all CPU context. Note that the fpu_ctxt block is filled + * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used. + */ +struct vcpu_guest_context { + /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */ + struct { char x[512]; } fpu_ctxt; /* User-level FPU registers */ +#define VGCF_I387_VALID (1<<0) +#define VGCF_IN_KERNEL (1<<2) +#define _VGCF_i387_valid 0 +#define VGCF_i387_valid (1<<_VGCF_i387_valid) +#define _VGCF_in_kernel 2 +#define VGCF_in_kernel (1<<_VGCF_in_kernel) +#define _VGCF_failsafe_disables_events 3 +#define VGCF_failsafe_disables_events (1<<_VGCF_failsafe_disables_events) +#define _VGCF_syscall_disables_events 4 +#define VGCF_syscall_disables_events (1<<_VGCF_syscall_disables_events) +#define _VGCF_online 5 +#define VGCF_online (1<<_VGCF_online) + unsigned long flags; /* VGCF_* flags */ + struct cpu_user_regs user_regs; /* User-level CPU registers */ + struct trap_info trap_ctxt[256]; /* Virtual IDT */ + unsigned long ldt_base, ldt_ents; /* LDT (linear address, # ents) */ + unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */ + unsigned long kernel_ss, kernel_sp; /* Virtual TSS (only SS1/SP1) */ + /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */ + unsigned long ctrlreg[8]; /* CR0-CR7 (control registers) */ + unsigned long debugreg[8]; /* DB0-DB7 (debug registers) */ +#ifdef __i386__ + unsigned long event_callback_cs; /* CS:EIP of event callback */ + unsigned long event_callback_eip; + unsigned long failsafe_callback_cs; /* CS:EIP of failsafe callback */ + unsigned long failsafe_callback_eip; +#else + unsigned long event_callback_eip; + unsigned long failsafe_callback_eip; +#ifdef __XEN__ + union { + unsigned long syscall_callback_eip; + struct { + unsigned int event_callback_cs; /* compat CS of event cb */ + unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ + }; + } u; +#else + unsigned long syscall_callback_eip; +#endif +#endif + unsigned long vm_assist; /* VMASST_TYPE_* bitmap */ +#ifdef __x86_64__ + /* Segment base addresses. */ + uint64_t fs_base; + uint64_t gs_base_kernel; + uint64_t gs_base_user; +#endif +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +struct arch_shared_info { + unsigned long max_pfn; /* max pfn that appears in table */ + /* Frame containing list of mfns containing list of mfns containing p2m. */ + xen_pfn_t pfn_to_mfn_frame_list_list; + unsigned long nmi_reason; + uint64_t pad[32]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +#endif /* !__ASSEMBLY__ */ + +/* + * Prefix forces emulation of some non-trapping instructions. + * Currently only CPUID. + */ +#ifdef __ASSEMBLY__ +#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ; +#define XEN_CPUID XEN_EMULATE_PREFIX cpuid +#else +#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; " +#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid" +#endif + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/arch-x86/xen.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86/xen-x86_32.h =================================================================== --- xen/interface/arch-x86/xen-x86_32.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/xen-x86_32.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,183 @@ +/****************************************************************************** + * xen-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2007, K A Fraser + */ + +#include +#include + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ + +/* + * Hypercall interface: + * Input: %ebx, %ecx, %edx, %esi, %edi (arguments 1-5) + * Output: %eax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; int $0x82 + */ +#define TRAP_INSTR "int $0x82" +#endif + +/* + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ +#define FLAT_RING1_CS 0xe019 /* GDT index 259 */ +#define FLAT_RING1_DS 0xe021 /* GDT index 260 */ +#define FLAT_RING1_SS 0xe021 /* GDT index 260 */ +#define FLAT_RING3_CS 0xe02b /* GDT index 261 */ +#define FLAT_RING3_DS 0xe033 /* GDT index 262 */ +#define FLAT_RING3_SS 0xe033 /* GDT index 262 */ + +#define FLAT_KERNEL_CS FLAT_RING1_CS +#define FLAT_KERNEL_DS FLAT_RING1_DS +#define FLAT_KERNEL_SS FLAT_RING1_SS +#define FLAT_USER_CS FLAT_RING3_CS +#define FLAT_USER_DS FLAT_RING3_DS +#define FLAT_USER_SS FLAT_RING3_SS + +#define __HYPERVISOR_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_START_PAE 0xF5800000 +#define __MACH2PHYS_VIRT_END_PAE 0xF6800000 +#define HYPERVISOR_VIRT_START_PAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE) +#define MACH2PHYS_VIRT_START_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE) +#define MACH2PHYS_VIRT_END_PAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE) + +/* Non-PAE bounds are obsolete. */ +#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_START_NONPAE 0xFC000000 +#define __MACH2PHYS_VIRT_END_NONPAE 0xFC400000 +#define HYPERVISOR_VIRT_START_NONPAE \ + mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_START_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE) +#define MACH2PHYS_VIRT_END_NONPAE \ + mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE) + +#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE +#define __MACH2PHYS_VIRT_START __MACH2PHYS_VIRT_START_PAE +#define __MACH2PHYS_VIRT_END __MACH2PHYS_VIRT_END_PAE + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START) +#endif + +/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#undef ___DEFINE_XEN_GUEST_HANDLE +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } \ + __guest_handle_ ## name; \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_64_ ## name +#undef set_xen_guest_handle +#define set_xen_guest_handle(hnd, val) \ + do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ + (hnd).p = val; \ + } while ( 0 ) +#define uint64_aligned_t uint64_t __attribute__((aligned(8))) +#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name +#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name) +#endif + +#ifndef __ASSEMBLY__ + +struct cpu_user_regs { + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint32_t esi; + uint32_t edi; + uint32_t ebp; + uint32_t eax; + uint16_t error_code; /* private */ + uint16_t entry_vector; /* private */ + uint32_t eip; + uint16_t cs; + uint8_t saved_upcall_mask; + uint8_t _pad0; + uint32_t eflags; /* eflags.IF == !saved_upcall_mask */ + uint32_t esp; + uint16_t ss, _pad1; + uint16_t es, _pad2; + uint16_t ds, _pad3; + uint16_t fs, _pad4; + uint16_t gs, _pad5; +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +/* + * Page-directory addresses above 4GB do not fit into architectural %cr3. + * When accessing %cr3, or equivalent field in vcpu_guest_context, guests + * must use the following accessor macros to pack/unpack valid MFNs. + */ +#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) +#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +struct xen_callback { + unsigned long cs; + unsigned long eip; +}; +typedef struct xen_callback xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/arch-x86/xen-x86_32.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86/cpuid.h =================================================================== --- xen/interface/arch-x86/cpuid.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/cpuid.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,68 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* Xen identification leaves start at 0x40000000. */ +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000000) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000001) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000002) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ Property changes on: xen/interface/arch-x86/cpuid.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: fbsd:nokeywords + true Added: svn:eol-style + native Index: xen/interface/arch-x86/hvm/save.h =================================================================== --- xen/interface/arch-x86/hvm/save.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/hvm/save.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,429 @@ +/* + * Structure definitions for HVM state that is held by Xen and must + * be saved along with the domain's memory and device-model state. + * + * Copyright (c) 2007 XenSource Ltd. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__ +#define __XEN_PUBLIC_HVM_SAVE_X86_H__ + +/* + * Save/restore header: general info about the save file. + */ + +#define HVM_FILE_MAGIC 0x54381286 +#define HVM_FILE_VERSION 0x00000001 + +struct hvm_save_header { + uint32_t magic; /* Must be HVM_FILE_MAGIC */ + uint32_t version; /* File format version */ + uint64_t changeset; /* Version of Xen that saved this file */ + uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); + + +/* + * Processor + */ + +struct hvm_hw_cpu { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + + uint32_t sysenter_cs; + uint32_t padding0; + + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); + + +/* + * PIC + */ + +struct hvm_hw_vpic { + /* IR line bitmasks. */ + uint8_t irr; + uint8_t imr; + uint8_t isr; + + /* Line IRx maps to IRQ irq_base+x */ + uint8_t irq_base; + + /* + * Where are we in ICW2-4 initialisation (0 means no init in progress)? + * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1). + * Bit 2: ICW1.IC4 (1 == ICW4 included in init sequence) + * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence) + */ + uint8_t init_state:4; + + /* IR line with highest priority. */ + uint8_t priority_add:4; + + /* Reads from A=0 obtain ISR or IRR? */ + uint8_t readsel_isr:1; + + /* Reads perform a polling read? */ + uint8_t poll:1; + + /* Automatically clear IRQs from the ISR during INTA? */ + uint8_t auto_eoi:1; + + /* Automatically rotate IRQ priorities during AEOI? */ + uint8_t rotate_on_auto_eoi:1; + + /* Exclude slave inputs when considering in-service IRQs? */ + uint8_t special_fully_nested_mode:1; + + /* Special mask mode excludes masked IRs from AEOI and priority checks. */ + uint8_t special_mask_mode:1; + + /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */ + uint8_t is_master:1; + + /* Edge/trigger selection. */ + uint8_t elcr; + + /* Virtual INT output. */ + uint8_t int_output; +}; + +DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic); + + +/* + * IO-APIC + */ + +#ifdef __ia64__ +#define VIOAPIC_IS_IOSAPIC 1 +#define VIOAPIC_NUM_PINS 24 +#else +#define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ +#endif + +struct hvm_hw_vioapic { + uint64_t base_address; + uint32_t ioregsel; + uint32_t id; + union vioapic_redir_entry + { + uint64_t bits; + struct { + uint8_t vector; + uint8_t delivery_mode:3; + uint8_t dest_mode:1; + uint8_t delivery_status:1; + uint8_t polarity:1; + uint8_t remote_irr:1; + uint8_t trig_mode:1; + uint8_t mask:1; + uint8_t reserve:7; +#if !VIOAPIC_IS_IOSAPIC + uint8_t reserved[4]; + uint8_t dest_id; +#else + uint8_t reserved[3]; + uint16_t dest_id; +#endif + } fields; + } redirtbl[VIOAPIC_NUM_PINS]; +}; + +DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic); + + +/* + * LAPIC + */ + +struct hvm_hw_lapic { + uint64_t apic_base_msr; + uint32_t disabled; /* VLAPIC_xx_DISABLED */ + uint32_t timer_divisor; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); + +struct hvm_hw_lapic_regs { + uint8_t data[1024]; +}; + +DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs); + + +/* + * IRQs + */ + +struct hvm_hw_pci_irqs { + /* + * Virtual interrupt wires for a single PCI bus. + * Indexed by: device*4 + INTx#. + */ + union { + DECLARE_BITMAP(i, 32*4); + uint64_t pad[2]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs); + +struct hvm_hw_isa_irqs { + /* + * Virtual interrupt wires for ISA devices. + * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). + */ + union { + DECLARE_BITMAP(i, 16); + uint64_t pad[1]; + }; +}; + +DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs); + +struct hvm_hw_pci_link { + /* + * PCI-ISA interrupt router. + * Each PCI is 'wire-ORed' into one of four links using + * the traditional 'barber's pole' mapping ((device + INTx#) & 3). + * The router provides a programmable mapping from each link to a GSI. + */ + uint8_t route[4]; + uint8_t pad0[4]; +}; + +DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link); + +/* + * PIT + */ + +struct hvm_hw_pit { + struct hvm_hw_pit_channel { + uint32_t count; /* can be 65536 */ + uint16_t latched_count; + uint8_t count_latched; + uint8_t status_latched; + uint8_t status; + uint8_t read_state; + uint8_t write_state; + uint8_t write_latch; + uint8_t rw_mode; + uint8_t mode; + uint8_t bcd; /* not supported */ + uint8_t gate; /* timer start */ + } channels[3]; /* 3 x 16 bytes */ + uint32_t speaker_data_on; + uint32_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit); + + +/* + * RTC + */ + +#define RTC_CMOS_SIZE 14 +struct hvm_hw_rtc { + /* CMOS bytes */ + uint8_t cmos_data[RTC_CMOS_SIZE]; + /* Index register for 2-part operations */ + uint8_t cmos_index; + uint8_t pad0; +}; + +DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc); + + +/* + * HPET + */ + +#define HPET_TIMER_NUM 3 /* 3 timers supported now */ +struct hvm_hw_hpet { + /* Memory-mapped, software visible registers */ + uint64_t capability; /* capabilities */ + uint64_t res0; /* reserved */ + uint64_t config; /* configuration */ + uint64_t res1; /* reserved */ + uint64_t isr; /* interrupt status reg */ + uint64_t res2[25]; /* reserved */ + uint64_t mc64; /* main counter */ + uint64_t res3; /* reserved */ + struct { /* timers */ + uint64_t config; /* configuration/cap */ + uint64_t cmp; /* comparator */ + uint64_t fsb; /* FSB route, not supported now */ + uint64_t res4; /* reserved */ + } timers[HPET_TIMER_NUM]; + uint64_t res5[4*(24-HPET_TIMER_NUM)]; /* reserved, up to 0x3ff */ + + /* Hidden register state */ + uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */ +}; + +DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet); + + +/* + * PM timer + */ + +struct hvm_hw_pmtimer { + uint32_t tmr_val; /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */ + uint16_t pm1a_sts; /* PM1a_EVT_BLK.PM1a_STS: status register */ + uint16_t pm1a_en; /* PM1a_EVT_BLK.PM1a_EN: enable register */ +}; + +DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer); + +/* + * MTRR MSRs + */ + +struct hvm_hw_mtrr { +#define MTRR_VCNT 8 +#define NUM_FIXED_MSR 11 + uint64_t msr_pat_cr; + /* mtrr physbase & physmask msr pair*/ + uint64_t msr_mtrr_var[MTRR_VCNT*2]; + uint64_t msr_mtrr_fixed[NUM_FIXED_MSR]; + uint64_t msr_mtrr_cap; + uint64_t msr_mtrr_def_type; +}; + +DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); + +/* + * Largest type-code in use + */ +#define HVM_SAVE_CODE_MAX 14 + +#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ Property changes on: xen/interface/arch-x86/hvm/save.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: fbsd:nokeywords + true Added: svn:eol-style + native Index: xen/interface/arch-x86/xen-x86_64.h =================================================================== --- xen/interface/arch-x86/xen-x86_64.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/xen-x86_64.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,212 @@ +/****************************************************************************** + * xen-x86_64.h + * + * Guest OS interface to x86 64-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ +#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ + +/* + * Hypercall interface: + * Input: %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5) + * Output: %rax + * Access is via hypercall page (set up by guest loader or via a Xen MSR): + * call hypercall_page + hypercall-number * 32 + * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) + */ + +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +/* + * Legacy hypercall interface: + * As above, except the entry sequence to the hypervisor is: + * mov $hypercall-number*32,%eax ; syscall + * Clobbered: %rcx, %r11, argument registers (as above) + */ +#define TRAP_INSTR "syscall" +#endif + +/* + * 64-bit segment selectors + * These flat segments are in the Xen-private section of every GDT. Since these + * are also present in the initial GDT, many OSes will be able to avoid + * installing their own GDT. + */ + +#define FLAT_RING3_CS32 0xe023 /* GDT index 260 */ +#define FLAT_RING3_CS64 0xe033 /* GDT index 261 */ +#define FLAT_RING3_DS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_DS64 0x0000 /* NULL selector */ +#define FLAT_RING3_SS32 0xe02b /* GDT index 262 */ +#define FLAT_RING3_SS64 0xe02b /* GDT index 262 */ + +#define FLAT_KERNEL_DS64 FLAT_RING3_DS64 +#define FLAT_KERNEL_DS32 FLAT_RING3_DS32 +#define FLAT_KERNEL_DS FLAT_KERNEL_DS64 +#define FLAT_KERNEL_CS64 FLAT_RING3_CS64 +#define FLAT_KERNEL_CS32 FLAT_RING3_CS32 +#define FLAT_KERNEL_CS FLAT_KERNEL_CS64 +#define FLAT_KERNEL_SS64 FLAT_RING3_SS64 +#define FLAT_KERNEL_SS32 FLAT_RING3_SS32 +#define FLAT_KERNEL_SS FLAT_KERNEL_SS64 + +#define FLAT_USER_DS64 FLAT_RING3_DS64 +#define FLAT_USER_DS32 FLAT_RING3_DS32 +#define FLAT_USER_DS FLAT_USER_DS64 +#define FLAT_USER_CS64 FLAT_RING3_CS64 +#define FLAT_USER_CS32 FLAT_RING3_CS32 +#define FLAT_USER_CS FLAT_USER_CS64 +#define FLAT_USER_SS64 FLAT_RING3_SS64 +#define FLAT_USER_SS32 FLAT_RING3_SS32 +#define FLAT_USER_SS FLAT_USER_SS64 + +#define __HYPERVISOR_VIRT_START 0xFFFF800000000000 +#define __HYPERVISOR_VIRT_END 0xFFFF880000000000 +#define __MACH2PHYS_VIRT_START 0xFFFF800000000000 +#define __MACH2PHYS_VIRT_END 0xFFFF804000000000 + +#ifndef HYPERVISOR_VIRT_START +#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START) +#define HYPERVISOR_VIRT_END mk_unsigned_long(__HYPERVISOR_VIRT_END) +#endif + +#define MACH2PHYS_VIRT_START mk_unsigned_long(__MACH2PHYS_VIRT_START) +#define MACH2PHYS_VIRT_END mk_unsigned_long(__MACH2PHYS_VIRT_END) +#define MACH2PHYS_NR_ENTRIES ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3) +#ifndef machine_to_phys_mapping +#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START) +#endif + +/* + * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base) + * @which == SEGBASE_* ; @base == 64-bit base address + * Returns 0 on success. + */ +#define SEGBASE_FS 0 +#define SEGBASE_GS_USER 1 +#define SEGBASE_GS_KERNEL 2 +#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */ + +/* + * int HYPERVISOR_iret(void) + * All arguments are on the kernel stack, in the following format. + * Never returns if successful. Current kernel context is lost. + * The saved CS is mapped as follows: + * RING0 -> RING3 kernel mode. + * RING1 -> RING3 kernel mode. + * RING2 -> RING3 kernel mode. + * RING3 -> RING3 user mode. + * However RING0 indicates that the guest kernel should return to iteself + * directly with + * orb $3,1*8(%rsp) + * iretq + * If flags contains VGCF_in_syscall: + * Restore RAX, RIP, RFLAGS, RSP. + * Discard R11, RCX, CS, SS. + * Otherwise: + * Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP. + * All other registers are saved on hypercall entry and restored to user. + */ +/* Guest exited in SYSCALL context? Return to guest with SYSRET? */ +#define _VGCF_in_syscall 8 +#define VGCF_in_syscall (1<<_VGCF_in_syscall) +#define VGCF_IN_SYSCALL VGCF_in_syscall + +#ifndef __ASSEMBLY__ + +struct iret_context { + /* Top of stack (%rsp at point of hypercall). */ + uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss; + /* Bottom of iret stack frame. */ +}; + +#if defined(__GNUC__) && !defined(__STRICT_ANSI__) +/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */ +#define __DECL_REG(name) union { \ + uint64_t r ## name, e ## name; \ + uint32_t _e ## name; \ +} +#else +/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */ +#define __DECL_REG(name) uint64_t r ## name +#endif + +struct cpu_user_regs { + uint64_t r15; + uint64_t r14; + uint64_t r13; + uint64_t r12; + __DECL_REG(bp); + __DECL_REG(bx); + uint64_t r11; + uint64_t r10; + uint64_t r9; + uint64_t r8; + __DECL_REG(ax); + __DECL_REG(cx); + __DECL_REG(dx); + __DECL_REG(si); + __DECL_REG(di); + uint32_t error_code; /* private */ + uint32_t entry_vector; /* private */ + __DECL_REG(ip); + uint16_t cs, _pad0[1]; + uint8_t saved_upcall_mask; + uint8_t _pad1[3]; + __DECL_REG(flags); /* rflags.IF == !saved_upcall_mask */ + __DECL_REG(sp); + uint16_t ss, _pad2[3]; + uint16_t es, _pad3[3]; + uint16_t ds, _pad4[3]; + uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base. */ + uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */ +}; +typedef struct cpu_user_regs cpu_user_regs_t; +DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t); + +#undef __DECL_REG + +#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12) +#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12) + +struct arch_vcpu_info { + unsigned long cr2; + unsigned long pad; /* sizeof(vcpu_info_t) == 64 */ +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +typedef unsigned long xen_callback_t; + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/arch-x86/xen-x86_64.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86/xen-mca.h =================================================================== --- xen/interface/arch-x86/xen-mca.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86/xen-mca.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,279 @@ +/****************************************************************************** + * arch-x86/mca.h + * + * Contributed by Advanced Micro Devices, Inc. + * Author: Christoph Egger + * + * Guest OS machine check interface to x86 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* Full MCA functionality has the following Usecases from the guest side: + * + * Must have's: + * 1. Dom0 and DomU register machine check trap callback handlers + * (already done via "set_trap_table" hypercall) + * 2. Dom0 registers machine check event callback handler + * (doable via EVTCHNOP_bind_virq) + * 3. Dom0 and DomU fetches machine check data + * 4. Dom0 wants Xen to notify a DomU + * 5. Dom0 gets DomU ID from physical address + * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy") + * + * Nice to have's: + * 7. Dom0 wants Xen to deactivate a physical CPU + * This is better done as separate task, physical CPU hotplugging, + * and hypercall(s) should be sysctl's + * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to + * move a DomU (or Dom0 itself) away from a malicious page + * producing correctable errors. + * 9. offlining physical page: + * Xen free's and never re-uses a certain physical page. + * 10. Testfacility: Allow Dom0 to write values into machine check MSR's + * and tell Xen to trigger a machine check + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__ +#define __XEN_PUBLIC_ARCH_X86_MCA_H__ + +/* Hypercall */ +#define __HYPERVISOR_mca __HYPERVISOR_arch_0 + +#define XEN_MCA_INTERFACE_VERSION 0x03000001 + +/* IN: Dom0 calls hypercall from MC event handler. */ +#define XEN_MC_CORRECTABLE 0x0 +/* IN: Dom0/DomU calls hypercall from MC trap handler. */ +#define XEN_MC_TRAP 0x1 +/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */ + +/* OUT: All is ok */ +#define XEN_MC_OK 0x0 +/* OUT: Domain could not fetch data. */ +#define XEN_MC_FETCHFAILED 0x1 +/* OUT: There was no machine check data to fetch. */ +#define XEN_MC_NODATA 0x2 +/* OUT: Between notification time and this hypercall an other + * (most likely) correctable error happened. The fetched data, + * does not match the original machine check data. */ +#define XEN_MC_NOMATCH 0x4 + +/* OUT: DomU did not register MC NMI handler. Try something else. */ +#define XEN_MC_CANNOTHANDLE 0x8 +/* OUT: Notifying DomU failed. Retry later or try something else. */ +#define XEN_MC_NOTDELIVERED 0x10 +/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */ + + +#ifndef __ASSEMBLY__ + +#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */ + +/* + * Machine Check Architecure: + * structs are read-only and used to report all kinds of + * correctable and uncorrectable errors detected by the HW. + * Dom0 and DomU: register a handler to get notified. + * Dom0 only: Correctable errors are reported via VIRQ_MCA + * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers + */ +#define MC_TYPE_GLOBAL 0 +#define MC_TYPE_BANK 1 +#define MC_TYPE_EXTENDED 2 + +struct mcinfo_common { + uint16_t type; /* structure type */ + uint16_t size; /* size of this struct in bytes */ +}; + + +#define MC_FLAG_CORRECTABLE (1 << 0) +#define MC_FLAG_UNCORRECTABLE (1 << 1) + +/* contains global x86 mc information */ +struct mcinfo_global { + struct mcinfo_common common; + + /* running domain at the time in error (most likely the impacted one) */ + uint16_t mc_domid; + uint32_t mc_socketid; /* physical socket of the physical core */ + uint16_t mc_coreid; /* physical impacted core */ + uint16_t mc_core_threadid; /* core thread of physical core */ + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint64_t mc_gstatus; /* global status */ + uint32_t mc_flags; +}; + +/* contains bank local x86 mc information */ +struct mcinfo_bank { + struct mcinfo_common common; + + uint16_t mc_bank; /* bank nr */ + uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0 + * and if mc_addr is valid. Never valid on DomU. */ + uint64_t mc_status; /* bank status */ + uint64_t mc_addr; /* bank address, only valid + * if addr bit is set in mc_status */ + uint64_t mc_misc; +}; + + +struct mcinfo_msr { + uint64_t reg; /* MSR */ + uint64_t value; /* MSR value */ +}; + +/* contains mc information from other + * or additional mc MSRs */ +struct mcinfo_extended { + struct mcinfo_common common; + + /* You can fill up to five registers. + * If you need more, then use this structure + * multiple times. */ + + uint32_t mc_msrs; /* Number of msr with valid values. */ + struct mcinfo_msr mc_msr[5]; +}; + +#define MCINFO_HYPERCALLSIZE 1024 +#define MCINFO_MAXSIZE 768 + +struct mc_info { + /* Number of mcinfo_* entries in mi_data */ + uint32_t mi_nentries; + + uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; +}; +typedef struct mc_info mc_info_t; + + + +/* + * OS's should use these instead of writing their own lookup function + * each with its own bugs and drawbacks. + * We use macros instead of static inline functions to allow guests + * to include this header in assembly files (*.S). + */ +/* Prototype: + * uint32_t x86_mcinfo_nentries(struct mc_info *mi); + */ +#define x86_mcinfo_nentries(_mi) \ + (_mi)->mi_nentries +/* Prototype: + * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); + */ +#define x86_mcinfo_first(_mi) \ + (struct mcinfo_common *)((_mi)->mi_data) +/* Prototype: + * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); + */ +#define x86_mcinfo_next(_mic) \ + (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) + +/* Prototype: + * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); + */ +#define x86_mcinfo_lookup(_ret, _mi, _type) \ + do { \ + uint32_t found, i; \ + struct mcinfo_common *_mic; \ + \ + found = 0; \ + (_ret) = NULL; \ + if (_mi == NULL) break; \ + _mic = x86_mcinfo_first(_mi); \ + for (i = 0; i < x86_mcinfo_nentries(_mi); i++) { \ + if (_mic->type == (_type)) { \ + found = 1; \ + break; \ + } \ + _mic = x86_mcinfo_next(_mic); \ + } \ + (_ret) = found ? _mic : NULL; \ + } while (0) + + +/* Usecase 1 + * Register machine check trap callback handler + * (already done via "set_trap_table" hypercall) + */ + +/* Usecase 2 + * Dom0 registers machine check event callback handler + * done by EVTCHNOP_bind_virq + */ + +/* Usecase 3 + * Fetch machine check data from hypervisor. + * Note, this hypercall is special, because both Dom0 and DomU must use this. + */ +#define XEN_MC_fetch 1 +struct xen_mc_fetch { + /* IN/OUT variables. */ + uint32_t flags; + +/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ +/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ + + /* OUT variables. */ + uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */ + struct mc_info mc_info; +}; +typedef struct xen_mc_fetch xen_mc_fetch_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); + + +/* Usecase 4 + * This tells the hypervisor to notify a DomU about the machine check error + */ +#define XEN_MC_notifydomain 2 +struct xen_mc_notifydomain { + /* IN variables. */ + uint16_t mc_domid; /* The unprivileged domain to notify. */ + uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. + * Usually echo'd value from the fetch hypercall. */ + uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */ + + /* IN/OUT variables. */ + uint32_t flags; + +/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ +/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */ +}; +typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); + + +struct xen_mc { + uint32_t cmd; + uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ + union { + struct xen_mc_fetch mc_fetch; + struct xen_mc_notifydomain mc_notifydomain; + uint8_t pad[MCINFO_HYPERCALLSIZE]; + } u; +}; +typedef struct xen_mc xen_mc_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_t); + +#endif /* __ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */ Property changes on: xen/interface/arch-x86/xen-mca.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: fbsd:nokeywords + true Added: svn:eol-style + native Property changes on: xen/interface/arch-x86 ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/vcpu.h =================================================================== --- xen/interface/vcpu.h (.../stable/6/sys) (revision 0) +++ xen/interface/vcpu.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,213 @@ +/****************************************************************************** + * vcpu.h + * + * VCPU initialisation, query, and hotplug. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VCPU_H__ +#define __XEN_PUBLIC_VCPU_H__ + +/* + * Prototype for this hypercall is: + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. + * @extra_args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Initialise a VCPU. Each VCPU can be initialised only once. A + * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. + * + * @extra_arg == pointer to vcpu_guest_context structure containing initial + * state for the VCPU. + */ +#define VCPUOP_initialise 0 + +/* + * Bring up a VCPU. This makes the VCPU runnable. This operation will fail + * if the VCPU has not been initialised (VCPUOP_initialise). + */ +#define VCPUOP_up 1 + +/* + * Bring down a VCPU (i.e., make it non-runnable). + * There are a few caveats that callers should observe: + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. + */ +#define VCPUOP_down 2 + +/* Returns 1 if the given VCPU is up. */ +#define VCPUOP_is_up 3 + +/* + * Return information about the state and running time of a VCPU. + * @extra_arg == pointer to vcpu_runstate_info structure. + */ +#define VCPUOP_get_runstate_info 4 +struct vcpu_runstate_info { + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; +}; +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); + +/* VCPU is currently running on a physical CPU. */ +#define RUNSTATE_running 0 + +/* VCPU is runnable, but not currently scheduled on any physical CPU. */ +#define RUNSTATE_runnable 1 + +/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */ +#define RUNSTATE_blocked 2 + +/* + * VCPU is not runnable, but it is not blocked. + * This is a 'catch all' state for things like hotplug and pauses by the + * system administrator (or for critical sections in the hypervisor). + * RUNSTATE_blocked dominates this state (it is the preferred state). + */ +#define RUNSTATE_offline 3 + +/* + * Register a shared memory area from which the guest may obtain its own + * runstate information without needing to execute a hypercall. + * Notes: + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. + * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. + */ +#define VCPUOP_register_runstate_memory_area 5 +struct vcpu_register_runstate_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); + +/* + * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer + * which can be set via these commands. Periods smaller than one millisecond + * may not be supported. + */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +struct vcpu_set_periodic_timer { + uint64_t period_ns; +}; +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); + +/* + * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot + * timer which can be set via these commands. + */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ +struct vcpu_set_singleshot_timer { + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ +}; +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); + +/* Flags to VCPUOP_set_singleshot_timer. */ + /* Require the timeout to be in the future (return -ETIME if it's passed). */ +#define _VCPU_SSHOTTMR_future (0) +#define VCPU_SSHOTTMR_future (1U << _VCPU_SSHOTTMR_future) + +/* + * Register a memory location in the guest address space for the + * vcpu_info structure. This allows the guest to place the vcpu_info + * structure in a convenient place, such as in a per-cpu data area. + * The pointer need not be page aligned, but the structure must not + * cross a page boundary. + * + * This may be called only once per vcpu. + */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ +struct vcpu_register_vcpu_info { + uint64_t mfn; /* mfn of page to place vcpu_info */ + uint32_t offset; /* offset within page */ + uint32_t rsvd; /* unused */ +}; +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and + * greater are reserved. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) \ + ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid))) +#define xen_vcpu_physid_to_x86_acpiid(physid) \ + ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32))) + +#endif /* __XEN_PUBLIC_VCPU_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/vcpu.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/features.h =================================================================== --- xen/interface/features.h (.../stable/6/sys) (revision 0) +++ xen/interface/features.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,74 @@ +/****************************************************************************** + * features.h + * + * Feature flags, reported by XENVER_get_features. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_FEATURES_H__ +#define __XEN_PUBLIC_FEATURES_H__ + +/* + * If set, the guest does not need to write-protect its pagetables, and can + * update them via direct writes. + */ +#define XENFEAT_writable_page_tables 0 + +/* + * If set, the guest does not need to write-protect its segment descriptor + * tables, and can update them via direct writes. + */ +#define XENFEAT_writable_descriptor_tables 1 + +/* + * If set, translation between the guest's 'pseudo-physical' address space + * and the host's machine address space are handled by the hypervisor. In this + * mode the guest does not need to perform phys-to/from-machine translations + * when performing page table operations. + */ +#define XENFEAT_auto_translated_physmap 2 + +/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */ +#define XENFEAT_supervisor_mode_kernel 3 + +/* + * If set, the guest does not need to allocate x86 PAE page directories + * below 4GB. This flag is usually implied by auto_translated_physmap. + */ +#define XENFEAT_pae_pgdir_above_4gb 4 + +/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ +#define XENFEAT_mmu_pt_update_preserve_ad 5 + +#define XENFEAT_NR_SUBMAPS 1 + +#endif /* __XEN_PUBLIC_FEATURES_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/features.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/physdev.h =================================================================== --- xen/interface/physdev.h (.../stable/6/sys) (revision 0) +++ xen/interface/physdev.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,219 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_PHYSDEV_H__ +#define __XEN_PUBLIC_PHYSDEV_H__ + +/* + * Prototype for this hypercall is: + * int physdev_op(int cmd, void *args) + * @cmd == PHYSDEVOP_??? (physdev operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Notify end-of-interrupt (EOI) for the specified IRQ. + * @arg == pointer to physdev_eoi structure. + */ +#define PHYSDEVOP_eoi 12 +struct physdev_eoi { + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Query the status of an IRQ line. + * @arg == pointer to physdev_irq_status_query structure. + */ +#define PHYSDEVOP_irq_status_query 5 +struct physdev_irq_status_query { + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ +}; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); + +/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) + +/* IRQ shared by multiple guests? */ +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) + +/* + * Set the current VCPU's I/O privilege level. + * @arg == pointer to physdev_set_iopl structure. + */ +#define PHYSDEVOP_set_iopl 6 +struct physdev_set_iopl { + /* IN */ + uint32_t iopl; +}; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); + +/* + * Set the current VCPU's I/O-port permissions bitmap. + * @arg == pointer to physdev_set_iobitmap structure. + */ +#define PHYSDEVOP_set_iobitmap 7 +struct physdev_set_iobitmap { + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; +}; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); + +/* + * Read or write an IO-APIC register. + * @arg == pointer to physdev_apic structure. + */ +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 +struct physdev_apic { + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; +}; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); + +/* + * Allocate or free a physical upcall vector for the specified IRQ line. + * @arg == pointer to physdev_irq structure. + */ +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 +struct physdev_irq { + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN */ + int index; + /* IN or OUT */ + int pirq; + /* IN */ + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +/* + * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() + * hypercall since 0x00030202. + */ +struct physdev_op { + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +/* + * Notify that some PIRQ-bound event channels have been unmasked. + * ** This command is obsolete since interface version 0x00030202 and is ** + * ** unsupported by newer versions of Xen. ** + */ +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 + +/* + * These all-capitals physdev operation names are superceded by the new names + * (defined above) since interface version 0x00030202. + */ +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared + +#endif /* __XEN_PUBLIC_PHYSDEV_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/physdev.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/grant_table.h =================================================================== --- xen/interface/grant_table.h (.../stable/6/sys) (revision 0) +++ xen/interface/grant_table.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,431 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ +struct grant_entry { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + uint32_t frame; +}; +typedef struct grant_entry grant_entry_t; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + */ +#define GTF_invalid (0U<<0) +#define GTF_permit_access (1U<<0) +#define GTF_accept_transfer (2U<<0) +#define GTF_type_mask (3U<<0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U<<_GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U<<_GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U<<_GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U<<_GTF_transfer_completed) + + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef uint32_t grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (,) for access + * by devices and/or host CPUs. If successful, is a tracking number + * that must be presented later to destroy the mapping(s). On error, + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in . + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint32_t flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + grant_handle_t handle; + uint64_t dev_bus_addr; +}; +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by . If or is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + uint64_t host_addr; + uint64_t dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); + +/* + * GNTTABOP_setup_table: Set up a grant table for comprising at least + * pages. The frame addresses are written to the . + * Only addresses are written, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + uint32_t nr_frames; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + XEN_GUEST_HANDLE(ulong) frame_list; +}; +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); + +/* + * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * . + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + int16_t status; +}; +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +typedef struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +} gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + uint32_t nr_frames; + uint32_t max_nr_frames; + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1<<_GNTMAP_device_map) + /* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1<<_GNTMAP_host_map) + /* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1<<_GNTMAP_readonly) + /* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1<<_GNTMAP_application_map) + + /* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/grant_table.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/COPYING =================================================================== --- xen/interface/COPYING (.../stable/6/sys) (revision 0) +++ xen/interface/COPYING (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,38 @@ +XEN NOTICE +========== + +This copyright applies to all files within this subdirectory and its +subdirectories: + include/public/*.h + include/public/hvm/*.h + include/public/io/*.h + +The intention is that these files can be freely copied into the source +tree of an operating system when porting that OS to run on Xen. Doing +so does *not* cause the OS to become subject to the terms of the GPL. + +All other files in the Xen source distribution are covered by version +2 of the GNU General Public License except where explicitly stated +otherwise within individual source files. + + -- Keir Fraser (on behalf of the Xen team) + +===================================================================== + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. Property changes on: xen/interface/COPYING ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/platform.h =================================================================== --- xen/interface/platform.h (.../stable/6/sys) (revision 0) +++ xen/interface/platform.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,346 @@ +/****************************************************************************** + * platform.h + * + * Hardware platform operations. Intended for use by domain-0 kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_PLATFORM_H__ +#define __XEN_PUBLIC_PLATFORM_H__ + +#include "xen.h" + +#define XENPF_INTERFACE_VERSION 0x03000001 + +/* + * Set clock such that it would read after 00:00:00 UTC, + * 1 January, 1970 if the current system time was . + */ +#define XENPF_settime 17 +struct xenpf_settime { + /* IN variables. */ + uint32_t secs; + uint32_t nsecs; + uint64_t system_time; +}; +typedef struct xenpf_settime xenpf_settime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t); + +/* + * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type. + * On x86, @type is an architecture-defined MTRR memory type. + * On success, returns the MTRR that was used (@reg) and a handle that can + * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting. + * (x86-specific). + */ +#define XENPF_add_memtype 31 +struct xenpf_add_memtype { + /* IN variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; + /* OUT variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_add_memtype xenpf_add_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t); + +/* + * Tear down an existing memory-range type. If @handle is remembered then it + * should be passed in to accurately tear down the correct setting (in case + * of overlapping memory regions with differing types). If it is not known + * then @handle should be set to zero. In all cases @reg must be set. + * (x86-specific). + */ +#define XENPF_del_memtype 32 +struct xenpf_del_memtype { + /* IN variables. */ + uint32_t handle; + uint32_t reg; +}; +typedef struct xenpf_del_memtype xenpf_del_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t); + +/* Read current type of an MTRR (x86-specific). */ +#define XENPF_read_memtype 33 +struct xenpf_read_memtype { + /* IN variables. */ + uint32_t reg; + /* OUT variables. */ + xen_pfn_t mfn; + uint64_t nr_mfns; + uint32_t type; +}; +typedef struct xenpf_read_memtype xenpf_read_memtype_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t); + +#define XENPF_microcode_update 35 +struct xenpf_microcode_update { + /* IN variables. */ + XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */ + uint32_t length; /* Length of microcode data. */ +}; +typedef struct xenpf_microcode_update xenpf_microcode_update_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t); + +#define XENPF_platform_quirk 39 +#define QUIRK_NOIRQBALANCING 1 /* Do not restrict IO-APIC RTE targets */ +#define QUIRK_IOAPIC_BAD_REGSEL 2 /* IO-APIC REGSEL forgets its value */ +#define QUIRK_IOAPIC_GOOD_REGSEL 3 /* IO-APIC REGSEL behaves properly */ +struct xenpf_platform_quirk { + /* IN variables. */ + uint32_t quirk_id; +}; +typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); + +#define XENPF_firmware_info 50 +#define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ +#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ +#define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +struct xenpf_firmware_info { + /* IN variables. */ + uint32_t type; + uint32_t index; + /* OUT variables. */ + union { + struct { + /* Int13, Fn48: Check Extensions Present. */ + uint8_t device; /* %dl: bios device number */ + uint8_t version; /* %ah: major version */ + uint16_t interface_support; /* %cx: support bitmap */ + /* Int13, Fn08: Legacy Get Device Parameters. */ + uint16_t legacy_max_cylinder; /* %cl[7:6]:%ch: max cyl # */ + uint8_t legacy_max_head; /* %dh: max head # */ + uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector # */ + /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */ + /* NB. First uint16_t of buffer must be set to buffer size. */ + XEN_GUEST_HANDLE(void) edd_params; + } disk_info; /* XEN_FW_DISK_INFO */ + struct { + uint8_t device; /* bios device number */ + uint32_t mbr_signature; /* offset 0x1b8 in mbr */ + } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */ + struct { + /* Int10, AX=4F15: Get EDID info. */ + uint8_t capabilities; + uint8_t edid_transfer_time; + /* must refer to 128-byte buffer */ + XEN_GUEST_HANDLE(uint8) edid; + } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + } u; +}; +typedef struct xenpf_firmware_info xenpf_firmware_info_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t); + +#define XENPF_enter_acpi_sleep 51 +struct xenpf_enter_acpi_sleep { + /* IN variables */ + uint16_t pm1a_cnt_val; /* PM1a control value. */ + uint16_t pm1b_cnt_val; /* PM1b control value. */ + uint32_t sleep_state; /* Which state to enter (Sn). */ + uint32_t flags; /* Must be zero. */ +}; +typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t); + +#define XENPF_change_freq 52 +struct xenpf_change_freq { + /* IN variables */ + uint32_t flags; /* Must be zero. */ + uint32_t cpu; /* Physical cpu. */ + uint64_t freq; /* New frequency (Hz). */ +}; +typedef struct xenpf_change_freq xenpf_change_freq_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t); + +/* + * Get idle times (nanoseconds since boot) for physical CPUs specified in the + * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is + * indexed by CPU number; only entries with the corresponding @cpumap_bitmap + * bit set are written to. On return, @cpumap_bitmap is modified so that any + * non-existent CPUs are cleared. Such CPUs have their @idletime array entry + * cleared. + */ +#define XENPF_getidletime 53 +struct xenpf_getidletime { + /* IN/OUT variables */ + /* IN: CPUs to interrogate; OUT: subset of IN which are present */ + XEN_GUEST_HANDLE(uint8) cpumap_bitmap; + /* IN variables */ + /* Size of cpumap bitmap. */ + uint32_t cpumap_nr_cpus; + /* Must be indexable for every cpu in cpumap_bitmap. */ + XEN_GUEST_HANDLE(uint64) idletime; + /* OUT variables */ + /* System time when the idletime snapshots were taken. */ + uint64_t now; +}; +typedef struct xenpf_getidletime xenpf_getidletime_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t); + +#define XENPF_set_processor_pminfo 54 + +/* ability bits */ +#define XEN_PROCESSOR_PM_CX 1 +#define XEN_PROCESSOR_PM_PX 2 +#define XEN_PROCESSOR_PM_TX 4 + +/* cmd type */ +#define XEN_PM_CX 0 +#define XEN_PM_PX 1 +#define XEN_PM_TX 2 + +/* Px sub info type */ +#define XEN_PX_PCT 1 +#define XEN_PX_PSS 2 +#define XEN_PX_PPC 4 +#define XEN_PX_PSD 8 + +struct xen_power_register { + uint32_t space_id; + uint32_t bit_width; + uint32_t bit_offset; + uint32_t access_size; + uint64_t address; +}; + +struct xen_processor_csd { + uint32_t domain; /* domain number of one dependent group */ + uint32_t coord_type; /* coordination type */ + uint32_t num; /* number of processors in same domain */ +}; +typedef struct xen_processor_csd xen_processor_csd_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t); + +struct xen_processor_cx { + struct xen_power_register reg; /* GAS for Cx trigger register */ + uint8_t type; /* cstate value, c0: 0, c1: 1, ... */ + uint32_t latency; /* worst latency (ms) to enter/exit this cstate */ + uint32_t power; /* average power consumption(mW) */ + uint32_t dpcnt; /* number of dependency entries */ + XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */ +}; +typedef struct xen_processor_cx xen_processor_cx_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t); + +struct xen_processor_flags { + uint32_t bm_control:1; + uint32_t bm_check:1; + uint32_t has_cst:1; + uint32_t power_setup_done:1; + uint32_t bm_rld_set:1; +}; + +struct xen_processor_power { + uint32_t count; /* number of C state entries in array below */ + struct xen_processor_flags flags; /* global flags of this processor */ + XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */ +}; + +struct xen_pct_register { + uint8_t descriptor; + uint16_t length; + uint8_t space_id; + uint8_t bit_width; + uint8_t bit_offset; + uint8_t reserved; + uint64_t address; +}; + +struct xen_processor_px { + uint64_t core_frequency; /* megahertz */ + uint64_t power; /* milliWatts */ + uint64_t transition_latency; /* microseconds */ + uint64_t bus_master_latency; /* microseconds */ + uint64_t control; /* control value */ + uint64_t status; /* success indicator */ +}; +typedef struct xen_processor_px xen_processor_px_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t); + +struct xen_psd_package { + uint64_t num_entries; + uint64_t revision; + uint64_t domain; + uint64_t coord_type; + uint64_t num_processors; +}; + +struct xen_processor_performance { + uint32_t flags; /* flag for Px sub info type */ + uint32_t platform_limit; /* Platform limitation on freq usage */ + struct xen_pct_register control_register; + struct xen_pct_register status_register; + uint32_t state_count; /* total available performance states */ + XEN_GUEST_HANDLE(xen_processor_px_t) states; + struct xen_psd_package domain_info; + uint32_t shared_type; /* coordination type of this processor */ +}; +typedef struct xen_processor_performance xen_processor_performance_t; +DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t); + +struct xenpf_set_processor_pminfo { + /* IN variables */ + uint32_t id; /* ACPI CPU ID */ + uint32_t type; /* {XEN_PM_CX, XEN_PM_PX} */ + union { + struct xen_processor_power power;/* Cx: _CST/_CSD */ + struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + } u; +}; +typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); + +struct xen_platform_op { + uint32_t cmd; + uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ + union { + struct xenpf_settime settime; + struct xenpf_add_memtype add_memtype; + struct xenpf_del_memtype del_memtype; + struct xenpf_read_memtype read_memtype; + struct xenpf_microcode_update microcode; + struct xenpf_platform_quirk platform_quirk; + struct xenpf_firmware_info firmware_info; + struct xenpf_enter_acpi_sleep enter_acpi_sleep; + struct xenpf_change_freq change_freq; + struct xenpf_getidletime getidletime; + struct xenpf_set_processor_pminfo set_pminfo; + uint8_t pad[128]; + } u; +}; +typedef struct xen_platform_op xen_platform_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t); + +#endif /* __XEN_PUBLIC_PLATFORM_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/platform.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/sched.h =================================================================== --- xen/interface/sched.h (.../stable/6/sys) (revision 0) +++ xen/interface/sched.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,121 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include "event_channel.h" + +/* + * The prototype for this hypercall is: + * long sched_op(int cmd, void *arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * This legacy version is available to new guests as sched_op_compat(). + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + */ +#define SCHEDOP_shutdown 2 +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* */ +}; +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 +struct sched_poll { + XEN_GUEST_HANDLE(evtchn_port_t) ports; + unsigned int nr_ports; + uint64_t timeout; +}; +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_xxx reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/sched.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/elfstructs.h =================================================================== --- xen/interface/elfstructs.h (.../stable/6/sys) (revision 0) +++ xen/interface/elfstructs.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,527 @@ +#ifndef __XEN_PUBLIC_ELFSTRUCTS_H__ +#define __XEN_PUBLIC_ELFSTRUCTS_H__ 1 +/* + * Copyright (c) 1995, 1996 Erik Theisen. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +typedef uint8_t Elf_Byte; + +typedef uint32_t Elf32_Addr; /* Unsigned program address */ +typedef uint32_t Elf32_Off; /* Unsigned file offset */ +typedef int32_t Elf32_Sword; /* Signed large integer */ +typedef uint32_t Elf32_Word; /* Unsigned large integer */ +typedef uint16_t Elf32_Half; /* Unsigned medium integer */ + +typedef uint64_t Elf64_Addr; +typedef uint64_t Elf64_Off; +typedef int32_t Elf64_Shalf; + +typedef int32_t Elf64_Sword; +typedef uint32_t Elf64_Word; + +typedef int64_t Elf64_Sxword; +typedef uint64_t Elf64_Xword; + +typedef uint32_t Elf64_Half; +typedef uint16_t Elf64_Quarter; + +/* + * e_ident[] identification indexes + * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html + */ +#define EI_MAG0 0 /* file ID */ +#define EI_MAG1 1 /* file ID */ +#define EI_MAG2 2 /* file ID */ +#define EI_MAG3 3 /* file ID */ +#define EI_CLASS 4 /* file class */ +#define EI_DATA 5 /* data encoding */ +#define EI_VERSION 6 /* ELF header version */ +#define EI_OSABI 7 /* OS/ABI ID */ +#define EI_ABIVERSION 8 /* ABI version */ +#define EI_PAD 9 /* start of pad bytes */ +#define EI_NIDENT 16 /* Size of e_ident[] */ + +/* e_ident[] magic number */ +#define ELFMAG0 0x7f /* e_ident[EI_MAG0] */ +#define ELFMAG1 'E' /* e_ident[EI_MAG1] */ +#define ELFMAG2 'L' /* e_ident[EI_MAG2] */ +#define ELFMAG3 'F' /* e_ident[EI_MAG3] */ +#define ELFMAG "\177ELF" /* magic */ +#define SELFMAG 4 /* size of magic */ + +/* e_ident[] file class */ +#define ELFCLASSNONE 0 /* invalid */ +#define ELFCLASS32 1 /* 32-bit objs */ +#define ELFCLASS64 2 /* 64-bit objs */ +#define ELFCLASSNUM 3 /* number of classes */ + +/* e_ident[] data encoding */ +#define ELFDATANONE 0 /* invalid */ +#define ELFDATA2LSB 1 /* Little-Endian */ +#define ELFDATA2MSB 2 /* Big-Endian */ +#define ELFDATANUM 3 /* number of data encode defines */ + +/* e_ident[] Operating System/ABI */ +#define ELFOSABI_SYSV 0 /* UNIX System V ABI */ +#define ELFOSABI_HPUX 1 /* HP-UX operating system */ +#define ELFOSABI_NETBSD 2 /* NetBSD */ +#define ELFOSABI_LINUX 3 /* GNU/Linux */ +#define ELFOSABI_HURD 4 /* GNU/Hurd */ +#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */ +#define ELFOSABI_SOLARIS 6 /* Solaris */ +#define ELFOSABI_MONTEREY 7 /* Monterey */ +#define ELFOSABI_IRIX 8 /* IRIX */ +#define ELFOSABI_FREEBSD 9 /* FreeBSD */ +#define ELFOSABI_TRU64 10 /* TRU64 UNIX */ +#define ELFOSABI_MODESTO 11 /* Novell Modesto */ +#define ELFOSABI_OPENBSD 12 /* OpenBSD */ +#define ELFOSABI_ARM 97 /* ARM */ +#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */ + +/* e_ident */ +#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \ + (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \ + (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \ + (ehdr).e_ident[EI_MAG3] == ELFMAG3) + +/* ELF Header */ +typedef struct elfhdr { + unsigned char e_ident[EI_NIDENT]; /* ELF Identification */ + Elf32_Half e_type; /* object file type */ + Elf32_Half e_machine; /* machine */ + Elf32_Word e_version; /* object file version */ + Elf32_Addr e_entry; /* virtual entry point */ + Elf32_Off e_phoff; /* program header table offset */ + Elf32_Off e_shoff; /* section header table offset */ + Elf32_Word e_flags; /* processor-specific flags */ + Elf32_Half e_ehsize; /* ELF header size */ + Elf32_Half e_phentsize; /* program header entry size */ + Elf32_Half e_phnum; /* number of program header entries */ + Elf32_Half e_shentsize; /* section header entry size */ + Elf32_Half e_shnum; /* number of section header entries */ + Elf32_Half e_shstrndx; /* section header table's "section + header string table" entry offset */ +} Elf32_Ehdr; + +typedef struct { + unsigned char e_ident[EI_NIDENT]; /* Id bytes */ + Elf64_Quarter e_type; /* file type */ + Elf64_Quarter e_machine; /* machine type */ + Elf64_Half e_version; /* version number */ + Elf64_Addr e_entry; /* entry point */ + Elf64_Off e_phoff; /* Program hdr offset */ + Elf64_Off e_shoff; /* Section hdr offset */ + Elf64_Half e_flags; /* Processor flags */ + Elf64_Quarter e_ehsize; /* sizeof ehdr */ + Elf64_Quarter e_phentsize; /* Program header entry size */ + Elf64_Quarter e_phnum; /* Number of program headers */ + Elf64_Quarter e_shentsize; /* Section header entry size */ + Elf64_Quarter e_shnum; /* Number of section headers */ + Elf64_Quarter e_shstrndx; /* String table index */ +} Elf64_Ehdr; + +/* e_type */ +#define ET_NONE 0 /* No file type */ +#define ET_REL 1 /* relocatable file */ +#define ET_EXEC 2 /* executable file */ +#define ET_DYN 3 /* shared object file */ +#define ET_CORE 4 /* core file */ +#define ET_NUM 5 /* number of types */ +#define ET_LOPROC 0xff00 /* reserved range for processor */ +#define ET_HIPROC 0xffff /* specific e_type */ + +/* e_machine */ +#define EM_NONE 0 /* No Machine */ +#define EM_M32 1 /* AT&T WE 32100 */ +#define EM_SPARC 2 /* SPARC */ +#define EM_386 3 /* Intel 80386 */ +#define EM_68K 4 /* Motorola 68000 */ +#define EM_88K 5 /* Motorola 88000 */ +#define EM_486 6 /* Intel 80486 - unused? */ +#define EM_860 7 /* Intel 80860 */ +#define EM_MIPS 8 /* MIPS R3000 Big-Endian only */ +/* + * Don't know if EM_MIPS_RS4_BE, + * EM_SPARC64, EM_PARISC, + * or EM_PPC are ABI compliant + */ +#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */ +#define EM_SPARC64 11 /* SPARC v9 64-bit unoffical */ +#define EM_PARISC 15 /* HPPA */ +#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */ +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC 64-bit */ +#define EM_ARM 40 /* Advanced RISC Machines ARM */ +#define EM_ALPHA 41 /* DEC ALPHA */ +#define EM_SPARCV9 43 /* SPARC version 9 */ +#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */ +#define EM_IA_64 50 /* Intel Merced */ +#define EM_X86_64 62 /* AMD x86-64 architecture */ +#define EM_VAX 75 /* DEC VAX */ + +/* Version */ +#define EV_NONE 0 /* Invalid */ +#define EV_CURRENT 1 /* Current */ +#define EV_NUM 2 /* number of versions */ + +/* Section Header */ +typedef struct { + Elf32_Word sh_name; /* name - index into section header + string table section */ + Elf32_Word sh_type; /* type */ + Elf32_Word sh_flags; /* flags */ + Elf32_Addr sh_addr; /* address */ + Elf32_Off sh_offset; /* file offset */ + Elf32_Word sh_size; /* section size */ + Elf32_Word sh_link; /* section header table index link */ + Elf32_Word sh_info; /* extra information */ + Elf32_Word sh_addralign; /* address alignment */ + Elf32_Word sh_entsize; /* section entry size */ +} Elf32_Shdr; + +typedef struct { + Elf64_Half sh_name; /* section name */ + Elf64_Half sh_type; /* section type */ + Elf64_Xword sh_flags; /* section flags */ + Elf64_Addr sh_addr; /* virtual address */ + Elf64_Off sh_offset; /* file offset */ + Elf64_Xword sh_size; /* section size */ + Elf64_Half sh_link; /* link to another */ + Elf64_Half sh_info; /* misc info */ + Elf64_Xword sh_addralign; /* memory alignment */ + Elf64_Xword sh_entsize; /* table entry size */ +} Elf64_Shdr; + +/* Special Section Indexes */ +#define SHN_UNDEF 0 /* undefined */ +#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */ +#define SHN_LOPROC 0xff00 /* reserved range for processor */ +#define SHN_HIPROC 0xff1f /* specific section indexes */ +#define SHN_ABS 0xfff1 /* absolute value */ +#define SHN_COMMON 0xfff2 /* common symbol */ +#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */ + +/* sh_type */ +#define SHT_NULL 0 /* inactive */ +#define SHT_PROGBITS 1 /* program defined information */ +#define SHT_SYMTAB 2 /* symbol table section */ +#define SHT_STRTAB 3 /* string table section */ +#define SHT_RELA 4 /* relocation section with addends*/ +#define SHT_HASH 5 /* symbol hash table section */ +#define SHT_DYNAMIC 6 /* dynamic section */ +#define SHT_NOTE 7 /* note section */ +#define SHT_NOBITS 8 /* no space section */ +#define SHT_REL 9 /* relation section without addends */ +#define SHT_SHLIB 10 /* reserved - purpose unknown */ +#define SHT_DYNSYM 11 /* dynamic symbol table section */ +#define SHT_NUM 12 /* number of section types */ +#define SHT_LOPROC 0x70000000 /* reserved range for processor */ +#define SHT_HIPROC 0x7fffffff /* specific section header types */ +#define SHT_LOUSER 0x80000000 /* reserved range for application */ +#define SHT_HIUSER 0xffffffff /* specific indexes */ + +/* Section names */ +#define ELF_BSS ".bss" /* uninitialized data */ +#define ELF_DATA ".data" /* initialized data */ +#define ELF_DEBUG ".debug" /* debug */ +#define ELF_DYNAMIC ".dynamic" /* dynamic linking information */ +#define ELF_DYNSTR ".dynstr" /* dynamic string table */ +#define ELF_DYNSYM ".dynsym" /* dynamic symbol table */ +#define ELF_FINI ".fini" /* termination code */ +#define ELF_GOT ".got" /* global offset table */ +#define ELF_HASH ".hash" /* symbol hash table */ +#define ELF_INIT ".init" /* initialization code */ +#define ELF_REL_DATA ".rel.data" /* relocation data */ +#define ELF_REL_FINI ".rel.fini" /* relocation termination code */ +#define ELF_REL_INIT ".rel.init" /* relocation initialization code */ +#define ELF_REL_DYN ".rel.dyn" /* relocaltion dynamic link info */ +#define ELF_REL_RODATA ".rel.rodata" /* relocation read-only data */ +#define ELF_REL_TEXT ".rel.text" /* relocation code */ +#define ELF_RODATA ".rodata" /* read-only data */ +#define ELF_SHSTRTAB ".shstrtab" /* section header string table */ +#define ELF_STRTAB ".strtab" /* string table */ +#define ELF_SYMTAB ".symtab" /* symbol table */ +#define ELF_TEXT ".text" /* code */ + + +/* Section Attribute Flags - sh_flags */ +#define SHF_WRITE 0x1 /* Writable */ +#define SHF_ALLOC 0x2 /* occupies memory */ +#define SHF_EXECINSTR 0x4 /* executable */ +#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */ + /* specific section attributes */ + +/* Symbol Table Entry */ +typedef struct elf32_sym { + Elf32_Word st_name; /* name - index into string table */ + Elf32_Addr st_value; /* symbol value */ + Elf32_Word st_size; /* symbol size */ + unsigned char st_info; /* type and binding */ + unsigned char st_other; /* 0 - no defined meaning */ + Elf32_Half st_shndx; /* section header index */ +} Elf32_Sym; + +typedef struct { + Elf64_Half st_name; /* Symbol name index in str table */ + Elf_Byte st_info; /* type / binding attrs */ + Elf_Byte st_other; /* unused */ + Elf64_Quarter st_shndx; /* section index of symbol */ + Elf64_Xword st_value; /* value of symbol */ + Elf64_Xword st_size; /* size of symbol */ +} Elf64_Sym; + +/* Symbol table index */ +#define STN_UNDEF 0 /* undefined */ + +/* Extract symbol info - st_info */ +#define ELF32_ST_BIND(x) ((x) >> 4) +#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +#define ELF64_ST_BIND(x) ((x) >> 4) +#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf) +#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf)) + +/* Symbol Binding - ELF32_ST_BIND - st_info */ +#define STB_LOCAL 0 /* Local symbol */ +#define STB_GLOBAL 1 /* Global symbol */ +#define STB_WEAK 2 /* like global - lower precedence */ +#define STB_NUM 3 /* number of symbol bindings */ +#define STB_LOPROC 13 /* reserved range for processor */ +#define STB_HIPROC 15 /* specific symbol bindings */ + +/* Symbol type - ELF32_ST_TYPE - st_info */ +#define STT_NOTYPE 0 /* not specified */ +#define STT_OBJECT 1 /* data object */ +#define STT_FUNC 2 /* function */ +#define STT_SECTION 3 /* section */ +#define STT_FILE 4 /* file */ +#define STT_NUM 5 /* number of symbol types */ +#define STT_LOPROC 13 /* reserved range for processor */ +#define STT_HIPROC 15 /* specific symbol types */ + +/* Relocation entry with implicit addend */ +typedef struct { + Elf32_Addr r_offset; /* offset of relocation */ + Elf32_Word r_info; /* symbol table index and type */ +} Elf32_Rel; + +/* Relocation entry with explicit addend */ +typedef struct { + Elf32_Addr r_offset; /* offset of relocation */ + Elf32_Word r_info; /* symbol table index and type */ + Elf32_Sword r_addend; +} Elf32_Rela; + +/* Extract relocation info - r_info */ +#define ELF32_R_SYM(i) ((i) >> 8) +#define ELF32_R_TYPE(i) ((unsigned char) (i)) +#define ELF32_R_INFO(s,t) (((s) << 8) + (unsigned char)(t)) + +typedef struct { + Elf64_Xword r_offset; /* where to do it */ + Elf64_Xword r_info; /* index & type of relocation */ +} Elf64_Rel; + +typedef struct { + Elf64_Xword r_offset; /* where to do it */ + Elf64_Xword r_info; /* index & type of relocation */ + Elf64_Sxword r_addend; /* adjustment value */ +} Elf64_Rela; + +#define ELF64_R_SYM(info) ((info) >> 32) +#define ELF64_R_TYPE(info) ((info) & 0xFFFFFFFF) +#define ELF64_R_INFO(s,t) (((s) << 32) + (u_int32_t)(t)) + +/* Program Header */ +typedef struct { + Elf32_Word p_type; /* segment type */ + Elf32_Off p_offset; /* segment offset */ + Elf32_Addr p_vaddr; /* virtual address of segment */ + Elf32_Addr p_paddr; /* physical address - ignored? */ + Elf32_Word p_filesz; /* number of bytes in file for seg. */ + Elf32_Word p_memsz; /* number of bytes in mem. for seg. */ + Elf32_Word p_flags; /* flags */ + Elf32_Word p_align; /* memory alignment */ +} Elf32_Phdr; + +typedef struct { + Elf64_Half p_type; /* entry type */ + Elf64_Half p_flags; /* flags */ + Elf64_Off p_offset; /* offset */ + Elf64_Addr p_vaddr; /* virtual address */ + Elf64_Addr p_paddr; /* physical address */ + Elf64_Xword p_filesz; /* file size */ + Elf64_Xword p_memsz; /* memory size */ + Elf64_Xword p_align; /* memory & file alignment */ +} Elf64_Phdr; + +/* Segment types - p_type */ +#define PT_NULL 0 /* unused */ +#define PT_LOAD 1 /* loadable segment */ +#define PT_DYNAMIC 2 /* dynamic linking section */ +#define PT_INTERP 3 /* the RTLD */ +#define PT_NOTE 4 /* auxiliary information */ +#define PT_SHLIB 5 /* reserved - purpose undefined */ +#define PT_PHDR 6 /* program header */ +#define PT_NUM 7 /* Number of segment types */ +#define PT_LOPROC 0x70000000 /* reserved range for processor */ +#define PT_HIPROC 0x7fffffff /* specific segment types */ + +/* Segment flags - p_flags */ +#define PF_X 0x1 /* Executable */ +#define PF_W 0x2 /* Writable */ +#define PF_R 0x4 /* Readable */ +#define PF_MASKPROC 0xf0000000 /* reserved bits for processor */ + /* specific segment flags */ + +/* Dynamic structure */ +typedef struct { + Elf32_Sword d_tag; /* controls meaning of d_val */ + union { + Elf32_Word d_val; /* Multiple meanings - see d_tag */ + Elf32_Addr d_ptr; /* program virtual address */ + } d_un; +} Elf32_Dyn; + +typedef struct { + Elf64_Xword d_tag; /* controls meaning of d_val */ + union { + Elf64_Addr d_ptr; + Elf64_Xword d_val; + } d_un; +} Elf64_Dyn; + +/* Dynamic Array Tags - d_tag */ +#define DT_NULL 0 /* marks end of _DYNAMIC array */ +#define DT_NEEDED 1 /* string table offset of needed lib */ +#define DT_PLTRELSZ 2 /* size of relocation entries in PLT */ +#define DT_PLTGOT 3 /* address PLT/GOT */ +#define DT_HASH 4 /* address of symbol hash table */ +#define DT_STRTAB 5 /* address of string table */ +#define DT_SYMTAB 6 /* address of symbol table */ +#define DT_RELA 7 /* address of relocation table */ +#define DT_RELASZ 8 /* size of relocation table */ +#define DT_RELAENT 9 /* size of relocation entry */ +#define DT_STRSZ 10 /* size of string table */ +#define DT_SYMENT 11 /* size of symbol table entry */ +#define DT_INIT 12 /* address of initialization func. */ +#define DT_FINI 13 /* address of termination function */ +#define DT_SONAME 14 /* string table offset of shared obj */ +#define DT_RPATH 15 /* string table offset of library + search path */ +#define DT_SYMBOLIC 16 /* start sym search in shared obj. */ +#define DT_REL 17 /* address of rel. tbl. w addends */ +#define DT_RELSZ 18 /* size of DT_REL relocation table */ +#define DT_RELENT 19 /* size of DT_REL relocation entry */ +#define DT_PLTREL 20 /* PLT referenced relocation entry */ +#define DT_DEBUG 21 /* bugger */ +#define DT_TEXTREL 22 /* Allow rel. mod. to unwritable seg */ +#define DT_JMPREL 23 /* add. of PLT's relocation entries */ +#define DT_BIND_NOW 24 /* Bind now regardless of env setting */ +#define DT_NUM 25 /* Number used. */ +#define DT_LOPROC 0x70000000 /* reserved range for processor */ +#define DT_HIPROC 0x7fffffff /* specific dynamic array tags */ + +/* Standard ELF hashing function */ +unsigned int elf_hash(const unsigned char *name); + +/* + * Note Definitions + */ +typedef struct { + Elf32_Word namesz; + Elf32_Word descsz; + Elf32_Word type; +} Elf32_Note; + +typedef struct { + Elf64_Half namesz; + Elf64_Half descsz; + Elf64_Half type; +} Elf64_Note; + + +#if defined(ELFSIZE) +#define CONCAT(x,y) __CONCAT(x,y) +#define ELFNAME(x) CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x))) +#define ELFNAME2(x,y) CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y)))) +#define ELFNAMEEND(x) CONCAT(x,CONCAT(_elf,ELFSIZE)) +#define ELFDEFNNAME(x) CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x))) +#endif + +#if defined(ELFSIZE) && (ELFSIZE == 32) +#define Elf_Ehdr Elf32_Ehdr +#define Elf_Phdr Elf32_Phdr +#define Elf_Shdr Elf32_Shdr +#define Elf_Sym Elf32_Sym +#define Elf_Rel Elf32_Rel +#define Elf_RelA Elf32_Rela +#define Elf_Dyn Elf32_Dyn +#define Elf_Word Elf32_Word +#define Elf_Sword Elf32_Sword +#define Elf_Addr Elf32_Addr +#define Elf_Off Elf32_Off +#define Elf_Nhdr Elf32_Nhdr +#define Elf_Note Elf32_Note + +#define ELF_R_SYM ELF32_R_SYM +#define ELF_R_TYPE ELF32_R_TYPE +#define ELF_R_INFO ELF32_R_INFO +#define ELFCLASS ELFCLASS32 + +#define ELF_ST_BIND ELF32_ST_BIND +#define ELF_ST_TYPE ELF32_ST_TYPE +#define ELF_ST_INFO ELF32_ST_INFO + +#define AuxInfo Aux32Info +#elif defined(ELFSIZE) && (ELFSIZE == 64) +#define Elf_Ehdr Elf64_Ehdr +#define Elf_Phdr Elf64_Phdr +#define Elf_Shdr Elf64_Shdr +#define Elf_Sym Elf64_Sym +#define Elf_Rel Elf64_Rel +#define Elf_RelA Elf64_Rela +#define Elf_Dyn Elf64_Dyn +#define Elf_Word Elf64_Word +#define Elf_Sword Elf64_Sword +#define Elf_Addr Elf64_Addr +#define Elf_Off Elf64_Off +#define Elf_Nhdr Elf64_Nhdr +#define Elf_Note Elf64_Note + +#define ELF_R_SYM ELF64_R_SYM +#define ELF_R_TYPE ELF64_R_TYPE +#define ELF_R_INFO ELF64_R_INFO +#define ELFCLASS ELFCLASS64 + +#define ELF_ST_BIND ELF64_ST_BIND +#define ELF_ST_TYPE ELF64_ST_TYPE +#define ELF_ST_INFO ELF64_ST_INFO + +#define AuxInfo Aux64Info +#endif + +#endif /* __XEN_PUBLIC_ELFSTRUCTS_H__ */ Property changes on: xen/interface/elfstructs.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/kexec.h =================================================================== --- xen/interface/kexec.h (.../stable/6/sys) (revision 0) +++ xen/interface/kexec.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,189 @@ +/****************************************************************************** + * kexec.h - Public portion + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Xen port written by: + * - Simon 'Horms' Horman + * - Magnus Damm + */ + +#ifndef _XEN_PUBLIC_KEXEC_H +#define _XEN_PUBLIC_KEXEC_H + + +/* This file describes the Kexec / Kdump hypercall interface for Xen. + * + * Kexec under vanilla Linux allows a user to reboot the physical machine + * into a new user-specified kernel. The Xen port extends this idea + * to allow rebooting of the machine from dom0. When kexec for dom0 + * is used to reboot, both the hypervisor and the domains get replaced + * with some other kernel. It is possible to kexec between vanilla + * Linux and Xen and back again. Xen to Xen works well too. + * + * The hypercall interface for kexec can be divided into three main + * types of hypercall operations: + * + * 1) Range information: + * This is used by the dom0 kernel to ask the hypervisor about various + * address information. This information is needed to allow kexec-tools + * to fill in the ELF headers for /proc/vmcore properly. + * + * 2) Load and unload of images: + * There are no big surprises here, the kexec binary from kexec-tools + * runs in userspace in dom0. The tool loads/unloads data into the + * dom0 kernel such as new kernel, initramfs and hypervisor. When + * loaded the dom0 kernel performs a load hypercall operation, and + * before releasing all page references the dom0 kernel calls unload. + * + * 3) Kexec operation: + * This is used to start a previously loaded kernel. + */ + +#include "xen.h" + +#if defined(__i386__) || defined(__x86_64__) +#define KEXEC_XEN_NO_PAGES 17 +#endif + +/* + * Prototype for this hypercall is: + * int kexec_op(int cmd, void *args) + * @cmd == KEXEC_CMD_... + * KEXEC operation to perform + * @args == Operation-specific extra arguments (NULL if none). + */ + +/* + * Kexec supports two types of operation: + * - kexec into a regular kernel, very similar to a standard reboot + * - KEXEC_TYPE_DEFAULT is used to specify this type + * - kexec into a special "crash kernel", aka kexec-on-panic + * - KEXEC_TYPE_CRASH is used to specify this type + * - parts of our system may be broken at kexec-on-panic time + * - the code should be kept as simple and self-contained as possible + */ + +#define KEXEC_TYPE_DEFAULT 0 +#define KEXEC_TYPE_CRASH 1 + + +/* The kexec implementation for Xen allows the user to load two + * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH. + * All data needed for a kexec reboot is kept in one xen_kexec_image_t + * per "instance". The data mainly consists of machine address lists to pages + * together with destination addresses. The data in xen_kexec_image_t + * is passed to the "code page" which is one page of code that performs + * the final relocations before jumping to the new kernel. + */ + +typedef struct xen_kexec_image { +#if defined(__i386__) || defined(__x86_64__) + unsigned long page_list[KEXEC_XEN_NO_PAGES]; +#endif +#if defined(__ia64__) + unsigned long reboot_code_buffer; +#endif + unsigned long indirection_page; + unsigned long start_address; +} xen_kexec_image_t; + +/* + * Perform kexec having previously loaded a kexec or kdump kernel + * as appropriate. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + */ +#define KEXEC_CMD_kexec 0 +typedef struct xen_kexec_exec { + int type; +} xen_kexec_exec_t; + +/* + * Load/Unload kernel image for kexec or kdump. + * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in] + * image == relocation information for kexec (ignored for unload) [in] + */ +#define KEXEC_CMD_kexec_load 1 +#define KEXEC_CMD_kexec_unload 2 +typedef struct xen_kexec_load { + int type; + xen_kexec_image_t image; +} xen_kexec_load_t; + +#define KEXEC_RANGE_MA_CRASH 0 /* machine address and size of crash area */ +#define KEXEC_RANGE_MA_XEN 1 /* machine address and size of Xen itself */ +#define KEXEC_RANGE_MA_CPU 2 /* machine address and size of a CPU note */ +#define KEXEC_RANGE_MA_XENHEAP 3 /* machine address and size of xenheap + * Note that although this is adjacent + * to Xen it exists in a separate EFI + * region on ia64, and thus needs to be + * inserted into iomem_machine separately */ +#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of + * the ia64_boot_param */ +#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of + * of the EFI Memory Map */ +#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */ + +/* + * Find the address and size of certain memory areas + * range == KEXEC_RANGE_... [in] + * nr == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in] + * size == number of bytes reserved in window [out] + * start == address of the first byte in the window [out] + */ +#define KEXEC_CMD_kexec_get_range 3 +typedef struct xen_kexec_range { + int range; + int nr; + unsigned long size; + unsigned long start; +} xen_kexec_range_t; + +/* vmcoreinfo stuff */ +#define VMCOREINFO_BYTES (4096) +#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN" +void arch_crash_save_vmcoreinfo(void); +void vmcoreinfo_append_str(const char *fmt, ...) + __attribute__ ((format (printf, 1, 2))); +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \ + (unsigned long)offsetof(struct name, field)) + +#endif /* _XEN_PUBLIC_KEXEC_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/kexec.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/xenoprof.h =================================================================== --- xen/interface/xenoprof.h (.../stable/6/sys) (revision 0) +++ xen/interface/xenoprof.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,138 @@ +/****************************************************************************** + * xenoprof.h + * + * Interface for enabling system wide profiling based on hardware performance + * counters + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Hewlett-Packard Co. + * Written by Aravind Menon & Jose Renato Santos + */ + +#ifndef __XEN_PUBLIC_XENOPROF_H__ +#define __XEN_PUBLIC_XENOPROF_H__ + +#include "xen.h" + +/* + * Commands to HYPERVISOR_xenoprof_op(). + */ +#define XENOPROF_init 0 +#define XENOPROF_reset_active_list 1 +#define XENOPROF_reset_passive_list 2 +#define XENOPROF_set_active 3 +#define XENOPROF_set_passive 4 +#define XENOPROF_reserve_counters 5 +#define XENOPROF_counter 6 +#define XENOPROF_setup_events 7 +#define XENOPROF_enable_virq 8 +#define XENOPROF_start 9 +#define XENOPROF_stop 10 +#define XENOPROF_disable_virq 11 +#define XENOPROF_release_counters 12 +#define XENOPROF_shutdown 13 +#define XENOPROF_get_buffer 14 +#define XENOPROF_set_backtrace 15 +#define XENOPROF_last_op 15 + +#define MAX_OPROF_EVENTS 32 +#define MAX_OPROF_DOMAINS 25 +#define XENOPROF_CPU_TYPE_SIZE 64 + +/* Xenoprof performance events (not Xen events) */ +struct event_log { + uint64_t eip; + uint8_t mode; + uint8_t event; +}; + +/* PC value that indicates a special code */ +#define XENOPROF_ESCAPE_CODE ~0UL +/* Transient events for the xenoprof->oprofile cpu buf */ +#define XENOPROF_TRACE_BEGIN 1 + +/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */ +struct xenoprof_buf { + uint32_t event_head; + uint32_t event_tail; + uint32_t event_size; + uint32_t vcpu_id; + uint64_t xen_samples; + uint64_t kernel_samples; + uint64_t user_samples; + uint64_t lost_samples; + struct event_log event_log[1]; +}; +#ifndef __XEN__ +typedef struct xenoprof_buf xenoprof_buf_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t); +#endif + +struct xenoprof_init { + int32_t num_events; + int32_t is_primary; + char cpu_type[XENOPROF_CPU_TYPE_SIZE]; +}; +typedef struct xenoprof_init xenoprof_init_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t); + +struct xenoprof_get_buffer { + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +}; +typedef struct xenoprof_get_buffer xenoprof_get_buffer_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t); + +struct xenoprof_counter { + uint32_t ind; + uint64_t count; + uint32_t enabled; + uint32_t event; + uint32_t hypervisor; + uint32_t kernel; + uint32_t user; + uint64_t unit_mask; +}; +typedef struct xenoprof_counter xenoprof_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t); + +typedef struct xenoprof_passive { + uint16_t domain_id; + int32_t max_samples; + int32_t nbuf; + int32_t bufsize; + uint64_t buf_gmaddr; +} xenoprof_passive_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); + + +#endif /* __XEN_PUBLIC_XENOPROF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/xenoprof.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/acm.h =================================================================== --- xen/interface/acm.h (.../stable/6/sys) (revision 0) +++ xen/interface/acm.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,228 @@ +/* + * acm.h: Xen access control module interface defintions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Reiner Sailer + * Copyright (c) 2005, International Business Machines Corporation. + */ + +#ifndef _XEN_PUBLIC_ACM_H +#define _XEN_PUBLIC_ACM_H + +#include "xen.h" + +/* if ACM_DEBUG defined, all hooks should + * print a short trace message (comment it out + * when not in testing mode ) + */ +/* #define ACM_DEBUG */ + +#ifdef ACM_DEBUG +# define printkd(fmt, args...) printk(fmt,## args) +#else +# define printkd(fmt, args...) +#endif + +/* default ssid reference value if not supplied */ +#define ACM_DEFAULT_SSID 0x0 +#define ACM_DEFAULT_LOCAL_SSID 0x0 + +/* Internal ACM ERROR types */ +#define ACM_OK 0 +#define ACM_UNDEF -1 +#define ACM_INIT_SSID_ERROR -2 +#define ACM_INIT_SOID_ERROR -3 +#define ACM_ERROR -4 + +/* External ACCESS DECISIONS */ +#define ACM_ACCESS_PERMITTED 0 +#define ACM_ACCESS_DENIED -111 +#define ACM_NULL_POINTER_ERROR -200 + +/* + Error codes reported in when trying to test for a new policy + These error codes are reported in an array of tuples where + each error code is followed by a parameter describing the error + more closely, such as a domain id. +*/ +#define ACM_EVTCHN_SHARING_VIOLATION 0x100 +#define ACM_GNTTAB_SHARING_VIOLATION 0x101 +#define ACM_DOMAIN_LOOKUP 0x102 +#define ACM_CHWALL_CONFLICT 0x103 +#define ACM_SSIDREF_IN_USE 0x104 + + +/* primary policy in lower 4 bits */ +#define ACM_NULL_POLICY 0 +#define ACM_CHINESE_WALL_POLICY 1 +#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2 +#define ACM_POLICY_UNDEFINED 15 + +/* combinations have secondary policy component in higher 4bit */ +#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \ + ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY) + +/* policy: */ +#define ACM_POLICY_NAME(X) \ + ((X) == (ACM_NULL_POLICY)) ? "NULL" : \ + ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" : \ + ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \ + ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \ + "UNDEFINED" + +/* the following policy versions must be increased + * whenever the interpretation of the related + * policy's data structure changes + */ +#define ACM_POLICY_VERSION 3 +#define ACM_CHWALL_VERSION 1 +#define ACM_STE_VERSION 1 + +/* defines a ssid reference used by xen */ +typedef uint32_t ssidref_t; + +/* hooks that are known to domains */ +#define ACMHOOK_none 0 +#define ACMHOOK_sharing 1 + +/* -------security policy relevant type definitions-------- */ + +/* type identifier; compares to "equal" or "not equal" */ +typedef uint16_t domaintype_t; + +/* CHINESE WALL POLICY DATA STRUCTURES + * + * current accumulated conflict type set: + * When a domain is started and has a type that is in + * a conflict set, the conflicting types are incremented in + * the aggregate set. When a domain is destroyed, the + * conflicting types to its type are decremented. + * If a domain has multiple types, this procedure works over + * all those types. + * + * conflict_aggregate_set[i] holds the number of + * running domains that have a conflict with type i. + * + * running_types[i] holds the number of running domains + * that include type i in their ssidref-referenced type set + * + * conflict_sets[i][j] is "0" if type j has no conflict + * with type i and is "1" otherwise. + */ +/* high-16 = version, low-16 = check magic */ +#define ACM_MAGIC 0x0001debc + +/* each offset in bytes from start of the struct they + * are part of */ + +/* V3 of the policy buffer aded a version structure */ +struct acm_policy_version +{ + uint32_t major; + uint32_t minor; +}; + + +/* each buffer consists of all policy information for + * the respective policy given in the policy code + * + * acm_policy_buffer, acm_chwall_policy_buffer, + * and acm_ste_policy_buffer need to stay 32-bit aligned + * because we create binary policies also with external + * tools that assume packed representations (e.g. the java tool) + */ +struct acm_policy_buffer { + uint32_t policy_version; /* ACM_POLICY_VERSION */ + uint32_t magic; + uint32_t len; + uint32_t policy_reference_offset; + uint32_t primary_policy_code; + uint32_t primary_buffer_offset; + uint32_t secondary_policy_code; + uint32_t secondary_buffer_offset; + struct acm_policy_version xml_pol_version; /* add in V3 */ +}; + + +struct acm_policy_reference_buffer { + uint32_t len; +}; + +struct acm_chwall_policy_buffer { + uint32_t policy_version; /* ACM_CHWALL_VERSION */ + uint32_t policy_code; + uint32_t chwall_max_types; + uint32_t chwall_max_ssidrefs; + uint32_t chwall_max_conflictsets; + uint32_t chwall_ssid_offset; + uint32_t chwall_conflict_sets_offset; + uint32_t chwall_running_types_offset; + uint32_t chwall_conflict_aggregate_offset; +}; + +struct acm_ste_policy_buffer { + uint32_t policy_version; /* ACM_STE_VERSION */ + uint32_t policy_code; + uint32_t ste_max_types; + uint32_t ste_max_ssidrefs; + uint32_t ste_ssid_offset; +}; + +struct acm_stats_buffer { + uint32_t magic; + uint32_t len; + uint32_t primary_policy_code; + uint32_t primary_stats_offset; + uint32_t secondary_policy_code; + uint32_t secondary_stats_offset; +}; + +struct acm_ste_stats_buffer { + uint32_t ec_eval_count; + uint32_t gt_eval_count; + uint32_t ec_denied_count; + uint32_t gt_denied_count; + uint32_t ec_cachehit_count; + uint32_t gt_cachehit_count; +}; + +struct acm_ssid_buffer { + uint32_t len; + ssidref_t ssidref; + uint32_t policy_reference_offset; + uint32_t primary_policy_code; + uint32_t primary_max_types; + uint32_t primary_types_offset; + uint32_t secondary_policy_code; + uint32_t secondary_max_types; + uint32_t secondary_types_offset; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/acm.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-x86_32.h =================================================================== --- xen/interface/arch-x86_32.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-x86_32.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,27 @@ +/****************************************************************************** + * arch-x86_32.h + * + * Guest OS interface to x86 32-bit Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004-2006, K A Fraser + */ + +#include Property changes on: xen/interface/arch-x86_32.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/xencomm.h =================================================================== --- xen/interface/xencomm.h (.../stable/6/sys) (revision 0) +++ xen/interface/xencomm.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,41 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) IBM Corp. 2006 + */ + +#ifndef _XEN_XENCOMM_H_ +#define _XEN_XENCOMM_H_ + +/* A xencomm descriptor is a scatter/gather list containing physical + * addresses corresponding to a virtually contiguous memory area. The + * hypervisor translates these physical addresses to machine addresses to copy + * to and from the virtually contiguous area. + */ + +#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */ +#define XENCOMM_INVALID (~0UL) + +struct xencomm_desc { + uint32_t magic; + uint32_t nr_addrs; /* the number of entries in address[] */ + uint64_t address[0]; +}; + +#endif /* _XEN_XENCOMM_H_ */ Property changes on: xen/interface/xencomm.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/memory.h =================================================================== --- xen/interface/memory.h (.../stable/6/sys) (revision 0) +++ xen/interface/memory.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,312 @@ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +#endif + +struct xen_memory_reservation { + + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else + unsigned int address_bits; +#endif + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); + +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Source mapping space. */ +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_mfn 2 /* usual MFN */ + unsigned int space; + + /* Index into source mapping space. */ + xen_ulong_t idx; + + /* GPFN where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; +typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); + +/* + * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error + * code on failure. This call only works for auto-translated guests. + */ +#define XENMEM_translate_gpfn_list 8 +struct xen_translate_gpfn_list { + /* Which domain to translate for? */ + domid_t domid; + + /* Length of list. */ + xen_ulong_t nr_gpfns; + + /* List of GPFNs to translate. */ + XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; + + /* + * Output list to contain MFN translations. May be the same as the input + * list (in which case each input GPFN is overwritten with the output MFN). + */ + XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; +}; +typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/memory.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/event_channel.h =================================================================== --- xen/interface/event_channel.h (.../stable/6/sys) (revision 0) +++ xen/interface/event_channel.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,264 @@ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +/* + * Prototype for this hypercall is: + * int event_channel_op(int cmd, void *args) + * @cmd == EVTCHNOP_??? (event-channel operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as + * accepting interdomain bindings from domain . A fresh port + * is allocated in and returned as . + * NOTES: + * 1. If the caller is unprivileged then must be DOMID_SELF. + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and . must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * . + * NOTES: + * 2. may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified + * vcpu. + * NOTES: + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + uint32_t pirq; +#define BIND_PIRQ__WILL_SHARE 1 + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; + +/* + * EVTCHNOP_close: Close a local event channel . If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_close evtchn_close_t; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is . + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_send evtchn_send_t; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at . + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; +}; +typedef struct evtchn_status evtchn_status_t; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; +}; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; +typedef struct evtchn_unmask evtchn_unmask_t; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * Argument to event_channel_op_compat() hypercall. Superceded by new + * event_channel_op() hypercall since 0x00030202. + */ +struct evtchn_op { + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/event_channel.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/xen-compat.h =================================================================== --- xen/interface/xen-compat.h (.../stable/6/sys) (revision 0) +++ xen/interface/xen-compat.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,44 @@ +/****************************************************************************** + * xen-compat.h + * + * Guest OS interface to Xen. Compatibility layer. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2006, Christian Limpach + */ + +#ifndef __XEN_PUBLIC_XEN_COMPAT_H__ +#define __XEN_PUBLIC_XEN_COMPAT_H__ + +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209 + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +/* Xen is built with matching headers and implements the latest interface. */ +#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__ +#elif !defined(__XEN_INTERFACE_VERSION__) +/* Guests which do not specify a version get the legacy interface. */ +#define __XEN_INTERFACE_VERSION__ 0x00000000 +#endif + +#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__ +#error "These header files do not support the requested interface version." +#endif + +#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */ Property changes on: xen/interface/xen-compat.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/arch-ia64.h =================================================================== --- xen/interface/arch-ia64.h (.../stable/6/sys) (revision 0) +++ xen/interface/arch-ia64.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,621 @@ +/****************************************************************************** + * arch-ia64/hypervisor-if.h + * + * Guest OS interface to IA64 Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + */ + +#include "xen.h" + +#ifndef __HYPERVISOR_IF_IA64_H__ +#define __HYPERVISOR_IF_IA64_H__ + +#if !defined(__GNUC__) || defined(__STRICT_ANSI__) +#error "Anonymous structs/unions are a GNU extension." +#endif + +/* Structural guest handles introduced in 0x00030201. */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030201 +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef struct { type *p; } __guest_handle_ ## name +#else +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ + typedef type * __guest_handle_ ## name +#endif + +#define __DEFINE_XEN_GUEST_HANDLE(name, type) \ + ___DEFINE_XEN_GUEST_HANDLE(name, type); \ + ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type) + +#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) +#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#define uint64_aligned_t uint64_t +#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#ifdef __XEN_TOOLS__ +#define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) +#endif + +#ifndef __ASSEMBLY__ +typedef unsigned long xen_pfn_t; +#define PRI_xen_pfn "lx" +#endif + +/* Arch specific VIRQs definition */ +#define VIRQ_ITC VIRQ_ARCH_0 /* V. Virtual itc timer */ +#define VIRQ_MCA_CMC VIRQ_ARCH_1 /* MCA cmc interrupt */ +#define VIRQ_MCA_CPE VIRQ_ARCH_2 /* MCA cpe interrupt */ + +/* Maximum number of virtual CPUs in multi-processor guests. */ +/* WARNING: before changing this, check that shared_info fits on a page */ +#define MAX_VIRT_CPUS 64 + +/* IO ports location for PV. */ +#define IO_PORTS_PADDR 0x00000ffffc000000UL +#define IO_PORTS_SIZE 0x0000000004000000UL + +#ifndef __ASSEMBLY__ + +typedef unsigned long xen_ulong_t; + +#ifdef __XEN_TOOLS__ +#define XEN_PAGE_SIZE XC_PAGE_SIZE +#else +#define XEN_PAGE_SIZE PAGE_SIZE +#endif + +#define INVALID_MFN (~0UL) + +struct pt_fpreg { + union { + unsigned long bits[2]; + long double __dummy; /* force 16-byte alignment */ + } u; +}; + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; +typedef union vac vac_t; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; +typedef union vdc vdc_t; + +struct mapped_regs { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + union { + unsigned long vgr[16]; + unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active + }; + union { + unsigned long vbgr[16]; + unsigned long bank0_regs[16]; // bank0 regs (r16-r31) when bank1 active + }; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; // CR0 + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; // CR8 + unsigned long rsv2[7]; + unsigned long ipsr; // CR16 + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; // CR24 + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; // CR64 + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; // CR72 + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; // CR80 + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + union { + unsigned long reserved5[128]; + struct { + unsigned long precover_ifs; + unsigned long unat; // not sure if this is needed until NaT arch is done + int interrupt_collection_enabled; // virtual psr.ic + /* virtual interrupt deliverable flag is evtchn_upcall_mask in + * shared info area now. interrupt_mask_addr is the address + * of evtchn_upcall_mask for current vcpu + */ + unsigned char *interrupt_mask_addr; + int pending_interruption; + unsigned char vpsr_pp; + unsigned char vpsr_dfh; + unsigned char hpsr_dfh; + unsigned char hpsr_mfh; + unsigned long reserved5_1[4]; + int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual + int banknum; // 0 or 1, which virtual register bank is active + unsigned long rrs[8]; // region registers + unsigned long krs[8]; // kernel registers + unsigned long tmp[16]; // temp registers (e.g. for hyperprivops) + }; + }; +}; +typedef struct mapped_regs mapped_regs_t; + +struct vpd { + struct mapped_regs vpd_low; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; +typedef struct vpd vpd_t; + +struct arch_vcpu_info { +}; +typedef struct arch_vcpu_info arch_vcpu_info_t; + +/* + * This structure is used for magic page in domain pseudo physical address + * space and the result of XENMEM_machine_memory_map. + * As the XENMEM_machine_memory_map result, + * xen_memory_map::nr_entries indicates the size in bytes + * including struct xen_ia64_memmap_info. Not the number of entries. + */ +struct xen_ia64_memmap_info { + uint64_t efi_memmap_size; /* size of EFI memory map */ + uint64_t efi_memdesc_size; /* size of an EFI memory map descriptor */ + uint32_t efi_memdesc_version; /* memory descriptor version */ + void *memdesc[0]; /* array of efi_memory_desc_t */ +}; +typedef struct xen_ia64_memmap_info xen_ia64_memmap_info_t; + +struct arch_shared_info { + /* PFN of the start_info page. */ + unsigned long start_info_pfn; + + /* Interrupt vector for event channel. */ + int evtchn_vector; + + /* PFN of memmap_info page */ + unsigned int memmap_info_num_pages;/* currently only = 1 case is + supported. */ + unsigned long memmap_info_pfn; + + uint64_t pad[31]; +}; +typedef struct arch_shared_info arch_shared_info_t; + +typedef unsigned long xen_callback_t; + +struct ia64_tr_entry { + unsigned long pte; + unsigned long itir; + unsigned long vadr; + unsigned long rid; +}; +typedef struct ia64_tr_entry ia64_tr_entry_t; +DEFINE_XEN_GUEST_HANDLE(ia64_tr_entry_t); + +struct vcpu_tr_regs { + struct ia64_tr_entry itrs[12]; + struct ia64_tr_entry dtrs[12]; +}; + +union vcpu_ar_regs { + unsigned long ar[128]; + struct { + unsigned long kr[8]; + unsigned long rsv1[8]; + unsigned long rsc; + unsigned long bsp; + unsigned long bspstore; + unsigned long rnat; + unsigned long rsv2; + unsigned long fcr; + unsigned long rsv3[2]; + unsigned long eflag; + unsigned long csd; + unsigned long ssd; + unsigned long cflg; + unsigned long fsr; + unsigned long fir; + unsigned long fdr; + unsigned long rsv4; + unsigned long ccv; /* 32 */ + unsigned long rsv5[3]; + unsigned long unat; + unsigned long rsv6[3]; + unsigned long fpsr; + unsigned long rsv7[3]; + unsigned long itc; + unsigned long rsv8[3]; + unsigned long ign1[16]; + unsigned long pfs; /* 64 */ + unsigned long lc; + unsigned long ec; + unsigned long rsv9[45]; + unsigned long ign2[16]; + }; +}; + +union vcpu_cr_regs { + unsigned long cr[128]; + struct { + unsigned long dcr; // CR0 + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; // CR8 + unsigned long rsv2[7]; + unsigned long ipsr; // CR16 + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; // CR24 + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; // CR64 + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; // CR72 + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; // CR80 + unsigned long lrr1; + unsigned long rsv6[46]; + }; +}; + +struct vcpu_guest_context_regs { + unsigned long r[32]; + unsigned long b[8]; + unsigned long bank[16]; + unsigned long ip; + unsigned long psr; + unsigned long cfm; + unsigned long pr; + unsigned int nats; /* NaT bits for r1-r31. */ + unsigned int bnats; /* Nat bits for banked registers. */ + union vcpu_ar_regs ar; + union vcpu_cr_regs cr; + struct pt_fpreg f[128]; + unsigned long dbr[8]; + unsigned long ibr[8]; + unsigned long rr[8]; + unsigned long pkr[16]; + + /* FIXME: cpuid,pmd,pmc */ + + unsigned long xip; + unsigned long xpsr; + unsigned long xfs; + unsigned long xr[4]; + + struct vcpu_tr_regs tr; + + /* Physical registers in case of debug event. */ + unsigned long excp_iipa; + unsigned long excp_ifa; + unsigned long excp_isr; + unsigned int excp_vector; + + /* + * The rbs is intended to be the image of the stacked registers still + * in the cpu (not yet stored in memory). It is laid out as if it + * were written in memory at a 512 (64*8) aligned address + offset. + * rbs_voff is (offset / 8). rbs_nat contains NaT bits for the + * remaining rbs registers. rbs_rnat contains NaT bits for in memory + * rbs registers. + * Note: loadrs is 2**14 bytes == 2**11 slots. + */ + unsigned int rbs_voff; + unsigned long rbs[2048]; + unsigned long rbs_rnat; + + /* + * RSE.N_STACKED_PHYS via PAL_RSE_INFO + * Strictly this isn't cpu context, but this value is necessary + * for domain save/restore. So is here. + */ + unsigned long num_phys_stacked; +}; + +struct vcpu_guest_context { +#define VGCF_EXTRA_REGS (1UL << 1) /* Set extra regs. */ +#define VGCF_SET_CR_IRR (1UL << 2) /* Set cr_irr[0:3]. */ +#define VGCF_online (1UL << 3) /* make this vcpu online */ + unsigned long flags; /* VGCF_* flags */ + + struct vcpu_guest_context_regs regs; + + unsigned long event_callback_ip; + + /* xen doesn't share privregs pages with hvm domain so that this member + * doesn't make sense for hvm domain. + * ~0UL is already used for INVALID_P2M_ENTRY. */ +#define VGC_PRIVREGS_HVM (~(-2UL)) + unsigned long privregs_pfn; +}; +typedef struct vcpu_guest_context vcpu_guest_context_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t); + +/* dom0 vp op */ +#define __HYPERVISOR_ia64_dom0vp_op __HYPERVISOR_arch_0 +/* Map io space in machine address to dom0 physical address space. + Currently physical assigned address equals to machine address. */ +#define IA64_DOM0VP_ioremap 0 + +/* Convert a pseudo physical page frame number to the corresponding + machine page frame number. If no page is assigned, INVALID_MFN or + GPFN_INV_MASK is returned depending on domain's non-vti/vti mode. */ +#define IA64_DOM0VP_phystomach 1 + +/* Convert a machine page frame number to the corresponding pseudo physical + page frame number of the caller domain. */ +#define IA64_DOM0VP_machtophys 3 + +/* Reserved for future use. */ +#define IA64_DOM0VP_iounmap 4 + +/* Unmap and free pages contained in the specified pseudo physical region. */ +#define IA64_DOM0VP_zap_physmap 5 + +/* Assign machine page frame to dom0's pseudo physical address space. */ +#define IA64_DOM0VP_add_physmap 6 + +/* expose the p2m table into domain */ +#define IA64_DOM0VP_expose_p2m 7 + +/* xen perfmon */ +#define IA64_DOM0VP_perfmon 8 + +/* gmfn version of IA64_DOM0VP_add_physmap */ +#define IA64_DOM0VP_add_physmap_with_gmfn 9 + +/* get fpswa revision */ +#define IA64_DOM0VP_fpswa_revision 10 + +/* Add an I/O port space range */ +#define IA64_DOM0VP_add_io_space 11 + +/* expose the foreign domain's p2m table into privileged domain */ +#define IA64_DOM0VP_expose_foreign_p2m 12 +#define IA64_DOM0VP_EFP_ALLOC_PTE 0x1 /* allocate p2m table */ + +/* unexpose the foreign domain's p2m table into privileged domain */ +#define IA64_DOM0VP_unexpose_foreign_p2m 13 + +// flags for page assignement to pseudo physical address space +#define _ASSIGN_readonly 0 +#define ASSIGN_readonly (1UL << _ASSIGN_readonly) +#define ASSIGN_writable (0UL << _ASSIGN_readonly) // dummy flag +/* Internal only: memory attribute must be WC/UC/UCE. */ +#define _ASSIGN_nocache 1 +#define ASSIGN_nocache (1UL << _ASSIGN_nocache) +// tlb tracking +#define _ASSIGN_tlb_track 2 +#define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track) +/* Internal only: associated with PGC_allocated bit */ +#define _ASSIGN_pgc_allocated 3 +#define ASSIGN_pgc_allocated (1UL << _ASSIGN_pgc_allocated) +/* Page is an IO page. */ +#define _ASSIGN_io 4 +#define ASSIGN_io (1UL << _ASSIGN_io) + +/* This structure has the same layout of struct ia64_boot_param, defined in + . It is redefined here to ease use. */ +struct xen_ia64_boot_param { + unsigned long command_line; /* physical address of cmd line args */ + unsigned long efi_systab; /* physical address of EFI system table */ + unsigned long efi_memmap; /* physical address of EFI memory map */ + unsigned long efi_memmap_size; /* size of EFI memory map */ + unsigned long efi_memdesc_size; /* size of an EFI memory map descriptor */ + unsigned int efi_memdesc_version; /* memory descriptor version */ + struct { + unsigned short num_cols; /* number of columns on console. */ + unsigned short num_rows; /* number of rows on console. */ + unsigned short orig_x; /* cursor's x position */ + unsigned short orig_y; /* cursor's y position */ + } console_info; + unsigned long fpswa; /* physical address of the fpswa interface */ + unsigned long initrd_start; + unsigned long initrd_size; + unsigned long domain_start; /* va where the boot time domain begins */ + unsigned long domain_size; /* how big is the boot domain */ +}; + +#endif /* !__ASSEMBLY__ */ + +/* Size of the shared_info area (this is not related to page size). */ +#define XSI_SHIFT 14 +#define XSI_SIZE (1 << XSI_SHIFT) +/* Log size of mapped_regs area (64 KB - only 4KB is used). */ +#define XMAPPEDREGS_SHIFT 12 +#define XMAPPEDREGS_SIZE (1 << XMAPPEDREGS_SHIFT) +/* Offset of XASI (Xen arch shared info) wrt XSI_BASE. */ +#define XMAPPEDREGS_OFS XSI_SIZE + +/* Hyperprivops. */ +#define HYPERPRIVOP_START 0x1 +#define HYPERPRIVOP_RFI (HYPERPRIVOP_START + 0x0) +#define HYPERPRIVOP_RSM_DT (HYPERPRIVOP_START + 0x1) +#define HYPERPRIVOP_SSM_DT (HYPERPRIVOP_START + 0x2) +#define HYPERPRIVOP_COVER (HYPERPRIVOP_START + 0x3) +#define HYPERPRIVOP_ITC_D (HYPERPRIVOP_START + 0x4) +#define HYPERPRIVOP_ITC_I (HYPERPRIVOP_START + 0x5) +#define HYPERPRIVOP_SSM_I (HYPERPRIVOP_START + 0x6) +#define HYPERPRIVOP_GET_IVR (HYPERPRIVOP_START + 0x7) +#define HYPERPRIVOP_GET_TPR (HYPERPRIVOP_START + 0x8) +#define HYPERPRIVOP_SET_TPR (HYPERPRIVOP_START + 0x9) +#define HYPERPRIVOP_EOI (HYPERPRIVOP_START + 0xa) +#define HYPERPRIVOP_SET_ITM (HYPERPRIVOP_START + 0xb) +#define HYPERPRIVOP_THASH (HYPERPRIVOP_START + 0xc) +#define HYPERPRIVOP_PTC_GA (HYPERPRIVOP_START + 0xd) +#define HYPERPRIVOP_ITR_D (HYPERPRIVOP_START + 0xe) +#define HYPERPRIVOP_GET_RR (HYPERPRIVOP_START + 0xf) +#define HYPERPRIVOP_SET_RR (HYPERPRIVOP_START + 0x10) +#define HYPERPRIVOP_SET_KR (HYPERPRIVOP_START + 0x11) +#define HYPERPRIVOP_FC (HYPERPRIVOP_START + 0x12) +#define HYPERPRIVOP_GET_CPUID (HYPERPRIVOP_START + 0x13) +#define HYPERPRIVOP_GET_PMD (HYPERPRIVOP_START + 0x14) +#define HYPERPRIVOP_GET_EFLAG (HYPERPRIVOP_START + 0x15) +#define HYPERPRIVOP_SET_EFLAG (HYPERPRIVOP_START + 0x16) +#define HYPERPRIVOP_RSM_BE (HYPERPRIVOP_START + 0x17) +#define HYPERPRIVOP_GET_PSR (HYPERPRIVOP_START + 0x18) +#define HYPERPRIVOP_SET_RR0_TO_RR4 (HYPERPRIVOP_START + 0x19) +#define HYPERPRIVOP_MAX (0x1a) + +/* Fast and light hypercalls. */ +#define __HYPERVISOR_ia64_fast_eoi __HYPERVISOR_arch_1 + +/* Extra debug features. */ +#define __HYPERVISOR_ia64_debug_op __HYPERVISOR_arch_2 + +/* Xencomm macros. */ +#define XENCOMM_INLINE_MASK 0xf800000000000000UL +#define XENCOMM_INLINE_FLAG 0x8000000000000000UL + +#ifndef __ASSEMBLY__ + +/* + * Optimization features. + * The hypervisor may do some special optimizations for guests. This hypercall + * can be used to switch on/of these special optimizations. + */ +#define __HYPERVISOR_opt_feature 0x700UL + +#define XEN_IA64_OPTF_OFF 0x0 +#define XEN_IA64_OPTF_ON 0x1 + +/* + * If this feature is switched on, the hypervisor inserts the + * tlb entries without calling the guests traphandler. + * This is useful in guests using region 7 for identity mapping + * like the linux kernel does. + */ +#define XEN_IA64_OPTF_IDENT_MAP_REG7 1 + +/* Identity mapping of region 4 addresses in HVM. */ +#define XEN_IA64_OPTF_IDENT_MAP_REG4 2 + +/* Identity mapping of region 5 addresses in HVM. */ +#define XEN_IA64_OPTF_IDENT_MAP_REG5 3 + +#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET (0) + +struct xen_ia64_opt_feature { + unsigned long cmd; /* Which feature */ + unsigned char on; /* Switch feature on/off */ + union { + struct { + /* The page protection bit mask of the pte. + * This will be or'ed with the pte. */ + unsigned long pgprot; + unsigned long key; /* A protection key for itir. */ + }; + }; +}; + +#endif /* __ASSEMBLY__ */ + +/* xen perfmon */ +#ifdef XEN +#ifndef __ASSEMBLY__ +#ifndef _ASM_IA64_PERFMON_H + +#include // asm/perfmon.h requires struct list_head +#include +// for PFM_xxx and pfarg_features_t, pfarg_context_t, pfarg_reg_t, pfarg_load_t + +#endif /* _ASM_IA64_PERFMON_H */ + +DEFINE_XEN_GUEST_HANDLE(pfarg_features_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_context_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_reg_t); +DEFINE_XEN_GUEST_HANDLE(pfarg_load_t); +#endif /* __ASSEMBLY__ */ +#endif /* XEN */ + +#ifndef __ASSEMBLY__ +#include "arch-ia64/hvm/memmap.h" +#endif + +#endif /* __HYPERVISOR_IF_IA64_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/arch-ia64.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/dom0_ops.h =================================================================== --- xen/interface/dom0_ops.h (.../stable/6/sys) (revision 0) +++ xen/interface/dom0_ops.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,120 @@ +/****************************************************************************** + * dom0_ops.h + * + * Process command requests from domain-0 guest OS. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOM0_OPS_H__ +#define __XEN_PUBLIC_DOM0_OPS_H__ + +#include "xen.h" +#include "platform.h" + +#if __XEN_INTERFACE_VERSION__ >= 0x00030204 +#error "dom0_ops.h is a compatibility interface only" +#endif + +#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION + +#define DOM0_SETTIME XENPF_settime +#define dom0_settime xenpf_settime +#define dom0_settime_t xenpf_settime_t + +#define DOM0_ADD_MEMTYPE XENPF_add_memtype +#define dom0_add_memtype xenpf_add_memtype +#define dom0_add_memtype_t xenpf_add_memtype_t + +#define DOM0_DEL_MEMTYPE XENPF_del_memtype +#define dom0_del_memtype xenpf_del_memtype +#define dom0_del_memtype_t xenpf_del_memtype_t + +#define DOM0_READ_MEMTYPE XENPF_read_memtype +#define dom0_read_memtype xenpf_read_memtype +#define dom0_read_memtype_t xenpf_read_memtype_t + +#define DOM0_MICROCODE XENPF_microcode_update +#define dom0_microcode xenpf_microcode_update +#define dom0_microcode_t xenpf_microcode_update_t + +#define DOM0_PLATFORM_QUIRK XENPF_platform_quirk +#define dom0_platform_quirk xenpf_platform_quirk +#define dom0_platform_quirk_t xenpf_platform_quirk_t + +typedef uint64_t cpumap_t; + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_MSR 15 +struct dom0_msr { + /* IN variables. */ + uint32_t write; + cpumap_t cpu_mask; + uint32_t msr; + uint32_t in1; + uint32_t in2; + /* OUT variables. */ + uint32_t out1; + uint32_t out2; +}; +typedef struct dom0_msr dom0_msr_t; +DEFINE_XEN_GUEST_HANDLE(dom0_msr_t); + +/* Unsupported legacy operation -- defined for API compatibility. */ +#define DOM0_PHYSICAL_MEMORY_MAP 40 +struct dom0_memory_map_entry { + uint64_t start, end; + uint32_t flags; /* reserved */ + uint8_t is_ram; +}; +typedef struct dom0_memory_map_entry dom0_memory_map_entry_t; +DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t); + +struct dom0_op { + uint32_t cmd; + uint32_t interface_version; /* DOM0_INTERFACE_VERSION */ + union { + struct dom0_msr msr; + struct dom0_settime settime; + struct dom0_add_memtype add_memtype; + struct dom0_del_memtype del_memtype; + struct dom0_read_memtype read_memtype; + struct dom0_microcode microcode; + struct dom0_platform_quirk platform_quirk; + struct dom0_memory_map_entry physical_memory_map; + uint8_t pad[128]; + } u; +}; +typedef struct dom0_op dom0_op_t; +DEFINE_XEN_GUEST_HANDLE(dom0_op_t); + +#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/dom0_ops.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/acm_ops.h =================================================================== --- xen/interface/acm_ops.h (.../stable/6/sys) (revision 0) +++ xen/interface/acm_ops.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,159 @@ +/* + * acm_ops.h: Xen access control module hypervisor commands + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Reiner Sailer + * Copyright (c) 2005,2006 International Business Machines Corporation. + */ + +#ifndef __XEN_PUBLIC_ACM_OPS_H__ +#define __XEN_PUBLIC_ACM_OPS_H__ + +#include "xen.h" +#include "acm.h" + +/* + * Make sure you increment the interface version whenever you modify this file! + * This makes sure that old versions of acm tools will stop working in a + * well-defined way (rather than crashing the machine, for instance). + */ +#define ACM_INTERFACE_VERSION 0xAAAA000A + +/************************************************************************/ + +/* + * Prototype for this hypercall is: + * int acm_op(int cmd, void *args) + * @cmd == ACMOP_??? (access control module operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + + +#define ACMOP_setpolicy 1 +struct acm_setpolicy { + /* IN */ + XEN_GUEST_HANDLE_64(void) pushcache; + uint32_t pushcache_size; +}; + + +#define ACMOP_getpolicy 2 +struct acm_getpolicy { + /* IN */ + XEN_GUEST_HANDLE_64(void) pullcache; + uint32_t pullcache_size; +}; + + +#define ACMOP_dumpstats 3 +struct acm_dumpstats { + /* IN */ + XEN_GUEST_HANDLE_64(void) pullcache; + uint32_t pullcache_size; +}; + + +#define ACMOP_getssid 4 +#define ACM_GETBY_ssidref 1 +#define ACM_GETBY_domainid 2 +struct acm_getssid { + /* IN */ + uint32_t get_ssid_by; /* ACM_GETBY_* */ + union { + domaintype_t domainid; + ssidref_t ssidref; + } id; + XEN_GUEST_HANDLE_64(void) ssidbuf; + uint32_t ssidbuf_size; +}; + +#define ACMOP_getdecision 5 +struct acm_getdecision { + /* IN */ + uint32_t get_decision_by1; /* ACM_GETBY_* */ + uint32_t get_decision_by2; /* ACM_GETBY_* */ + union { + domaintype_t domainid; + ssidref_t ssidref; + } id1; + union { + domaintype_t domainid; + ssidref_t ssidref; + } id2; + uint32_t hook; + /* OUT */ + uint32_t acm_decision; +}; + + +#define ACMOP_chgpolicy 6 +struct acm_change_policy { + /* IN */ + XEN_GUEST_HANDLE_64(void) policy_pushcache; + uint32_t policy_pushcache_size; + XEN_GUEST_HANDLE_64(void) del_array; + uint32_t delarray_size; + XEN_GUEST_HANDLE_64(void) chg_array; + uint32_t chgarray_size; + /* OUT */ + /* array with error code */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +#define ACMOP_relabeldoms 7 +struct acm_relabel_doms { + /* IN */ + XEN_GUEST_HANDLE_64(void) relabel_map; + uint32_t relabel_map_size; + /* OUT */ + XEN_GUEST_HANDLE_64(void) err_array; + uint32_t errarray_size; +}; + +/* future interface to Xen */ +struct xen_acmctl { + uint32_t cmd; + uint32_t interface_version; + union { + struct acm_setpolicy setpolicy; + struct acm_getpolicy getpolicy; + struct acm_dumpstats dumpstats; + struct acm_getssid getssid; + struct acm_getdecision getdecision; + struct acm_change_policy change_policy; + struct acm_relabel_doms relabel_doms; + } u; +}; + +typedef struct xen_acmctl xen_acmctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t); + +#endif /* __XEN_PUBLIC_ACM_OPS_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/acm_ops.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/pciif.h =================================================================== --- xen/interface/io/pciif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/pciif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,101 @@ +/* + * PCI Backend/Frontend Common Data Structures & Macros + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Ryan Wilson + */ +#ifndef __XEN_PCI_COMMON_H__ +#define __XEN_PCI_COMMON_H__ + +/* Be sure to bump this number if you change this file */ +#define XEN_PCI_MAGIC "7" + +/* xen_pci_sharedinfo flags */ +#define _XEN_PCIF_active (0) +#define XEN_PCIF_active (1<<_XEN_PCI_active) + +/* xen_pci_op commands */ +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) + +/* xen_pci_op error numbers */ +#define XEN_PCI_ERR_success (0) +#define XEN_PCI_ERR_dev_not_found (-1) +#define XEN_PCI_ERR_invalid_offset (-2) +#define XEN_PCI_ERR_access_denied (-3) +#define XEN_PCI_ERR_not_implemented (-4) +/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */ +#define XEN_PCI_ERR_op_failed (-5) + +/* + * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry)) + * Should not exceed 128 + */ +#define SH_INFO_MAX_VEC 128 + +struct xen_msix_entry { + uint16_t vector; + uint16_t entry; +}; +struct xen_pci_op { + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + + /* OUT: will contain an error number (if any) from errno.h */ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment */ + uint32_t bus; + uint32_t devfn; + + /* IN: which configuration registers to touch */ + int32_t offset; + int32_t size; + + /* IN/OUT: Contains the result after a READ or the value to WRITE */ + uint32_t value; + /* IN: Contains extra infor for this operation */ + uint32_t info; + /*IN: param for msi-x */ + struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; +}; + +struct xen_pci_sharedinfo { + /* flags - XEN_PCIF_* */ + uint32_t flags; + struct xen_pci_op op; +}; + +#endif /* __XEN_PCI_COMMON_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/pciif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/kbdif.h =================================================================== --- xen/interface/io/kbdif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/kbdif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,132 @@ +/* + * kbdif.h -- Xen virtual keyboard/mouse + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_KBDIF_H__ +#define __XEN_PUBLIC_IO_KBDIF_H__ + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* Pointer movement event */ +#define XENKBD_TYPE_MOTION 1 +/* Event type 2 currently not used */ +/* Key event (includes pointer buttons) */ +#define XENKBD_TYPE_KEY 3 +/* + * Pointer position event + * Capable backend sets feature-abs-pointer in xenstore. + * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting + * request-abs-update in xenstore. + */ +#define XENKBD_TYPE_POS 4 + +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +#define XENKBD_IN_EVENT_SIZE 40 + +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; +}; + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + * No out events currently defined. + */ + +#define XENKBD_OUT_EVENT_SIZE 40 + +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENKBD_IN_RING_SIZE 2048 +#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) +#define XENKBD_IN_RING_OFFS 1024 +#define XENKBD_IN_RING(page) \ + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) +#define XENKBD_IN_RING_REF(page, idx) \ + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + +#define XENKBD_OUT_RING_SIZE 1024 +#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) +#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) +#define XENKBD_OUT_RING(page) \ + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) +#define XENKBD_OUT_RING_REF(page, idx) \ + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; +}; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/kbdif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/ring.h =================================================================== --- xen/interface/io/ring.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/ring.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,307 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t pad[48]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ +} while(0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* Initialize to existing shared indexes -- for recovery */ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/ring.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/tpmif.h =================================================================== --- xen/interface/io/tpmif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/tpmif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,77 @@ +/****************************************************************************** + * tpmif.h + * + * TPM I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, IBM Corporation + * + * Author: Stefan Berger, stefanb@us.ibm.com + * Grant table support: Mahadevan Gomathisankaran + * + * This code has been derived from tools/libxc/xen/io/netif.h + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_TPMIF_H__ +#define __XEN_PUBLIC_IO_TPMIF_H__ + +#include "../grant_table.h" + +struct tpmif_tx_request { + unsigned long addr; /* Machine address of packet. */ + grant_ref_t ref; /* grant table access reference */ + uint16_t unused; + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct tpmif_tx_request tpmif_tx_request_t; + +/* + * The TPMIF_TX_RING_SIZE defines the number of pages the + * front-end and backend can exchange (= size of array). + */ +typedef uint32_t TPMIF_RING_IDX; + +#define TPMIF_TX_RING_SIZE 1 + +/* This structure must fit in a memory page. */ + +struct tpmif_ring { + struct tpmif_tx_request req; +}; +typedef struct tpmif_ring tpmif_ring_t; + +struct tpmif_tx_interface { + struct tpmif_ring ring[TPMIF_TX_RING_SIZE]; +}; +typedef struct tpmif_tx_interface tpmif_tx_interface_t; + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/tpmif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/xs_wire.h =================================================================== --- xen/interface/io/xs_wire.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/xs_wire.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,130 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type +{ + XS_DEBUG, + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors +{ + int errnum; + const char *errstring; +}; +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN) +}; + +struct xsd_sockmsg +{ + uint32_t type; /* XS_??? */ + uint32_t req_id;/* Request identifier, echoed in daemon's response. */ + uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */ + uint32_t len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type +{ + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* Inter-domain shared memory communications. */ +#define XENSTORE_RING_SIZE 1024 +typedef uint32_t XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/xs_wire.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/console.h =================================================================== --- xen/interface/io/console.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/console.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,51 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef uint32_t XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/console.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/protocols.h =================================================================== --- xen/interface/io/protocols.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/protocols.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,40 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_IA64 "ia64-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__ia64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 +#else +# error arch fixup needed here +#endif + +#endif Property changes on: xen/interface/io/protocols.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/xenbus.h =================================================================== --- xen/interface/io/xenbus.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/xenbus.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,80 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/xenbus.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/netif.h =================================================================== --- xen/interface/io/netif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/netif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,205 @@ +/****************************************************************************** + * netif.h + * + * Unified network-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_NETIF_H__ +#define __XEN_PUBLIC_IO_NETIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Notifications after enqueuing any type of message should be conditional on + * the appropriate req_event or rsp_event field in the shared ring. + * If the client sends notification for rx requests then it should specify + * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume + * that it cannot safely queue packets (as it may not be kicked to send them). + */ + +/* + * This is the 'wire' format for packets: + * Request 1: netif_tx_request -- NETTXF_* (any flags) + * [Request 2: netif_tx_extra] (only if request 1 has NETTXF_extra_info) + * [Request 3: netif_tx_extra] (only if request 2 has XEN_NETIF_EXTRA_MORE) + * Request 4: netif_tx_request -- NETTXF_more_data + * Request 5: netif_tx_request -- NETTXF_more_data + * ... + * Request N: netif_tx_request -- 0 + */ + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETTXF_csum_blank (0) +#define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) + +/* Packet data has been validated against protocol checksum. */ +#define _NETTXF_data_validated (1) +#define NETTXF_data_validated (1U<<_NETTXF_data_validated) + +/* Packet continues in the next request descriptor. */ +#define _NETTXF_more_data (2) +#define NETTXF_more_data (1U<<_NETTXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETTXF_extra_info (3) +#define NETTXF_extra_info (1U<<_NETTXF_extra_info) + +struct netif_tx_request { + grant_ref_t gref; /* Reference to buffer page */ + uint16_t offset; /* Offset within buffer page */ + uint16_t flags; /* NETTXF_* */ + uint16_t id; /* Echoed in response message. */ + uint16_t size; /* Packet size in bytes. */ +}; +typedef struct netif_tx_request netif_tx_request_t; + +/* Types of netif_extra_info descriptors. */ +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MAX (4) + +/* netif_extra_info flags. */ +#define _XEN_NETIF_EXTRA_FLAG_MORE (0) +#define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) + +/* GSO types - only TCPv4 currently supported. */ +#define XEN_NETIF_GSO_TYPE_TCPV4 (1) + +/* + * This structure needs to fit within both netif_tx_request and + * netif_rx_response for compatibility. + */ +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + /* + * XEN_NETIF_EXTRA_TYPE_GSO: + */ + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + /* + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * Backend advertises availability via 'feature-multicast-control' + * xenbus node containing value '1'. + * Frontend requests this feature by advertising + * 'request-multicast-control' xenbus node containing value '1'. + * If multicast control is requested then multicast flooding is + * disabled and the frontend must explicitly register its interest + * in multicast groups using dummy transmit requests containing + * MCAST_{ADD,DEL} extra-info fragments. + */ + struct { + uint8_t addr[6]; /* Address to add/remove. */ + } mcast; + + uint16_t pad[3]; + } u; +}; +typedef struct netif_extra_info netif_extra_info_t; + +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ +}; +typedef struct netif_tx_response netif_tx_response_t; + +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ +}; +typedef struct netif_rx_request netif_rx_request_t; + +/* Packet data has been validated against protocol checksum. */ +#define _NETRXF_data_validated (0) +#define NETRXF_data_validated (1U<<_NETRXF_data_validated) + +/* Protocol checksum field is blank in the packet (hardware offload)? */ +#define _NETRXF_csum_blank (1) +#define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) + +/* Packet continues in the next request descriptor. */ +#define _NETRXF_more_data (2) +#define NETRXF_more_data (1U<<_NETRXF_more_data) + +/* Packet to be followed by extra descriptor(s). */ +#define _NETRXF_extra_info (3) +#define NETRXF_extra_info (1U<<_NETRXF_extra_info) + +struct netif_rx_response { + uint16_t id; + uint16_t offset; /* Offset in page of start of received packet */ + uint16_t flags; /* NETRXF_* */ + int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ +}; +typedef struct netif_rx_response netif_rx_response_t; + +/* + * Generate netif ring structures and types. + */ + +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); + +#define NETIF_RSP_DROPPED -2 +#define NETIF_RSP_ERROR -1 +#define NETIF_RSP_OKAY 0 +/* No response: used for auxiliary requests (e.g., netif_tx_extra). */ +#define NETIF_RSP_NULL 1 + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/netif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/fbif.h =================================================================== --- xen/interface/io/fbif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/fbif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,176 @@ +/* + * fbif.h -- Xen virtual frame buffer device + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Anthony Liguori + * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster + */ + +#ifndef __XEN_PUBLIC_IO_FBIF_H__ +#define __XEN_PUBLIC_IO_FBIF_H__ + +/* Out events (frontend -> backend) */ + +/* + * Out events may be sent only when requested by backend, and receipt + * of an unknown out event is an error. + */ + +/* Event type 1 currently not used */ +/* + * Framebuffer update notification event + * Capable frontend sets feature-update in xenstore. + * Backend requests it by setting request-update in xenstore. + */ +#define XENFB_TYPE_UPDATE 2 + +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ +}; + +/* + * Framebuffer resize notification event + * Capable backend sets feature-resize in xenstore. + */ +#define XENFB_TYPE_RESIZE 3 + +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ +}; + +#define XENFB_OUT_EVENT_SIZE 40 + +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; +}; + +/* In events (backend -> frontend) */ + +/* + * Frontends should ignore unknown in events. + */ + +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + +#define XENFB_IN_EVENT_SIZE 40 + +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; +}; + +/* shared page */ + +#define XENFB_IN_RING_SIZE 1024 +#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) +#define XENFB_IN_RING_OFFS 1024 +#define XENFB_IN_RING(page) \ + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) +#define XENFB_IN_RING_REF(page, idx) \ + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + +#define XENFB_OUT_RING_SIZE 2048 +#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) +#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) +#define XENFB_OUT_RING(page) \ + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) +#define XENFB_OUT_RING_REF(page, idx) \ + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; +}; + +/* + * Wart: xenkbd needs to know default resolution. Put it here until a + * better solution is found, but don't leak it to the backend. + */ +#ifdef __KERNEL__ +#define XENFB_WIDTH 800 +#define XENFB_HEIGHT 600 +#define XENFB_DEPTH 32 +#endif + +#endif + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/fbif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/io/blkif.h =================================================================== --- xen/interface/io/blkif.h (.../stable/6/sys) (revision 0) +++ xen/interface/io/blkif.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,141 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include +#include + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * Recognised only if "feature-barrier" is present in backend xenbus info. + * The "feature-barrier" node contains a boolean indicating whether barrier + * requests are likely to succeed or fail. Either way, a barrier request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt barrier requests. + * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not* + * create the "feature-barrier" node! + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Recognised if "feature-flush-cache" is present in backend xenbus + * info. A flush will ask the underlying storage hardware to flush its + * non-volatile caches as appropriate. The "feature-flush-cache" node + * contains a boolean indicating whether flush requests are likely to + * succeed or fail. Either way, a flush request may fail at any time + * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying + * block-device hardware. The boolean simply indicates whether or not it + * is worthwhile for the frontend to attempt flushes. If a backend does + * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the + * "feature-flush-cache" node! + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + +struct blkif_request { + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +struct blkif_response { + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ +}; +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ + +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/io/blkif.h ___________________________________________________________________ Added: fbsd:nokeywords + true Property changes on: xen/interface/io ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/sysctl.h =================================================================== --- xen/interface/sysctl.h (.../stable/6/sys) (revision 0) +++ xen/interface/sysctl.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,308 @@ +/****************************************************************************** + * sysctl.h + * + * System management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_SYSCTL_H__ +#define __XEN_PUBLIC_SYSCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "sysctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" +#include "domctl.h" + +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006 + +/* + * Read console content from Xen buffer ring. + */ +#define XEN_SYSCTL_readconsole 1 +struct xen_sysctl_readconsole { + /* IN: Non-zero -> clear after reading. */ + uint8_t clear; + /* IN: Non-zero -> start index specified by @index field. */ + uint8_t incremental; + uint8_t pad0, pad1; + /* + * IN: Start index for consuming from ring buffer (if @incremental); + * OUT: End index after consuming from ring buffer. + */ + uint32_t index; + /* IN: Virtual address to write console data. */ + XEN_GUEST_HANDLE_64(char) buffer; + /* IN: Size of buffer; OUT: Bytes written to buffer. */ + uint32_t count; +}; +typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); + +/* Get trace buffers machine base address */ +#define XEN_SYSCTL_tbuf_op 2 +struct xen_sysctl_tbuf_op { + /* IN variables */ +#define XEN_SYSCTL_TBUFOP_get_info 0 +#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1 +#define XEN_SYSCTL_TBUFOP_set_evt_mask 2 +#define XEN_SYSCTL_TBUFOP_set_size 3 +#define XEN_SYSCTL_TBUFOP_enable 4 +#define XEN_SYSCTL_TBUFOP_disable 5 + uint32_t cmd; + /* IN/OUT variables */ + struct xenctl_cpumap cpu_mask; + uint32_t evt_mask; + /* OUT variables */ + uint64_aligned_t buffer_mfn; + uint32_t size; +}; +typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); + +/* + * Get physical information about the host machine + */ +#define XEN_SYSCTL_physinfo 3 + /* (x86) The platform supports HVM guests. */ +#define _XEN_SYSCTL_PHYSCAP_hvm 0 +#define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) + /* (x86) The platform supports HVM-guest direct access to I/O devices. */ +#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1 +#define XEN_SYSCTL_PHYSCAP_hvm_directio (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio) +struct xen_sysctl_physinfo { + uint32_t threads_per_core; + uint32_t cores_per_socket; + uint32_t nr_cpus; + uint32_t nr_nodes; + uint32_t cpu_khz; + uint64_aligned_t total_pages; + uint64_aligned_t free_pages; + uint64_aligned_t scrub_pages; + uint32_t hw_cap[8]; + + /* + * IN: maximum addressable entry in the caller-provided cpu_to_node array. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the cpu_to_node array is truncated! + */ + uint32_t max_cpu_id; + /* + * If not NULL, this array is filled with node identifier for each cpu. + * If a cpu has no node information (e.g., cpu not present) then the + * sentinel value ~0u is written. + * The size of this array is specified by the caller in @max_cpu_id. + * If the actual @max_cpu_id is smaller than the array then the trailing + * elements of the array will not be written by the sysctl. + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; + + /* XEN_SYSCTL_PHYSCAP_??? */ + uint32_t capabilities; +}; +typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t); + +/* + * Get the ID of the current scheduler. + */ +#define XEN_SYSCTL_sched_id 4 +struct xen_sysctl_sched_id { + /* OUT variable */ + uint32_t sched_id; +}; +typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); + +/* Interface for controlling Xen software performance counters. */ +#define XEN_SYSCTL_perfc_op 5 +/* Sub-operations: */ +#define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ +#define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ +struct xen_sysctl_perfc_desc { + char name[80]; /* name of perf counter */ + uint32_t nr_vals; /* number of values for this counter */ +}; +typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t); +typedef uint32_t xen_sysctl_perfc_val_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t); + +struct xen_sysctl_perfc_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_PERFCOP_??? */ + /* OUT variables. */ + uint32_t nr_counters; /* number of counters description */ + uint32_t nr_vals; /* number of values */ + /* counter information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc; + /* counter values (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val; +}; +typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); + +#define XEN_SYSCTL_getdomaininfolist 6 +struct xen_sysctl_getdomaininfolist { + /* IN variables. */ + domid_t first_domain; + uint32_t max_domains; + XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer; + /* OUT variables. */ + uint32_t num_domains; +}; +typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); + +/* Inject debug keys into Xen. */ +#define XEN_SYSCTL_debug_keys 7 +struct xen_sysctl_debug_keys { + /* IN variables. */ + XEN_GUEST_HANDLE_64(char) keys; + uint32_t nr_keys; +}; +typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); + +/* Get physical CPU information. */ +#define XEN_SYSCTL_getcpuinfo 8 +struct xen_sysctl_cpuinfo { + uint64_aligned_t idletime; +}; +typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); +struct xen_sysctl_getcpuinfo { + /* IN variables. */ + uint32_t max_cpus; + XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info; + /* OUT variables. */ + uint32_t nr_cpus; +}; +typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); + +#define XEN_SYSCTL_availheap 9 +struct xen_sysctl_availheap { + /* IN variables. */ + uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ + uint32_t max_bitwidth; /* Largest address width (zero if don't care). */ + int32_t node; /* NUMA node of interest (-1 for all nodes). */ + /* OUT variables. */ + uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */ +}; +typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); + +#define XEN_SYSCTL_get_pmstat 10 +struct pm_px_val { + uint64_aligned_t freq; /* Px core frequency */ + uint64_aligned_t residency; /* Px residency time */ + uint64_aligned_t count; /* Px transition count */ +}; +typedef struct pm_px_val pm_px_val_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_val_t); + +struct pm_px_stat { + uint8_t total; /* total Px states */ + uint8_t usable; /* usable Px states */ + uint8_t last; /* last Px state */ + uint8_t cur; /* current Px state */ + XEN_GUEST_HANDLE_64(uint64) trans_pt; /* Px transition table */ + XEN_GUEST_HANDLE_64(pm_px_val_t) pt; +}; +typedef struct pm_px_stat pm_px_stat_t; +DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t); + +struct pm_cx_stat { + uint32_t nr; /* entry nr in triggers & residencies, including C0 */ + uint32_t last; /* last Cx state */ + uint64_aligned_t idle_time; /* idle time from boot */ + XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ + XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ +}; + +struct xen_sysctl_get_pmstat { +#define PMSTAT_CATEGORY_MASK 0xf0 +#define PMSTAT_PX 0x10 +#define PMSTAT_CX 0x20 +#define PMSTAT_get_max_px (PMSTAT_PX | 0x1) +#define PMSTAT_get_pxstat (PMSTAT_PX | 0x2) +#define PMSTAT_reset_pxstat (PMSTAT_PX | 0x3) +#define PMSTAT_get_max_cx (PMSTAT_CX | 0x1) +#define PMSTAT_get_cxstat (PMSTAT_CX | 0x2) +#define PMSTAT_reset_cxstat (PMSTAT_CX | 0x3) + uint32_t type; + uint32_t cpuid; + union { + struct pm_px_stat getpx; + struct pm_cx_stat getcx; + /* other struct for tx, etc */ + } u; +}; +typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); + +#define XEN_SYSCTL_cpu_hotplug 11 +struct xen_sysctl_cpu_hotplug { + /* IN variables */ + uint32_t cpu; /* Physical cpu. */ +#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0 +#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1 + uint32_t op; /* hotplug opcode */ +}; +typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); + + +struct xen_sysctl { + uint32_t cmd; + uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ + union { + struct xen_sysctl_readconsole readconsole; + struct xen_sysctl_tbuf_op tbuf_op; + struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_sched_id sched_id; + struct xen_sysctl_perfc_op perfc_op; + struct xen_sysctl_getdomaininfolist getdomaininfolist; + struct xen_sysctl_debug_keys debug_keys; + struct xen_sysctl_getcpuinfo getcpuinfo; + struct xen_sysctl_availheap availheap; + struct xen_sysctl_get_pmstat get_pmstat; + struct xen_sysctl_cpu_hotplug cpu_hotplug; + uint8_t pad[128]; + } u; +}; +typedef struct xen_sysctl xen_sysctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t); + +#endif /* __XEN_PUBLIC_SYSCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/sysctl.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/domctl.h =================================================================== --- xen/interface/domctl.h (.../stable/6/sys) (revision 0) +++ xen/interface/domctl.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,677 @@ +/****************************************************************************** + * domctl.h + * + * Domain management operations. For use by node control stack. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2002-2003, B Dragovic + * Copyright (c) 2002-2006, K Fraser + */ + +#ifndef __XEN_PUBLIC_DOMCTL_H__ +#define __XEN_PUBLIC_DOMCTL_H__ + +#if !defined(__XEN__) && !defined(__XEN_TOOLS__) +#error "domctl operations are intended for use by node control tools only" +#endif + +#include "xen.h" + +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 + +struct xenctl_cpumap { + XEN_GUEST_HANDLE_64(uint8_t) bitmap; + uint32_t nr_cpus; +}; + +/* + * NB. xen_domctl.domain is an IN/OUT parameter for this operation. + * If it is specified as zero, an id is auto-allocated and returned. + */ +#define XEN_DOMCTL_createdomain 1 +struct xen_domctl_createdomain { + /* IN parameters */ + uint32_t ssidref; + xen_domain_handle_t handle; + /* Is this an HVM guest (as opposed to a PV guest)? */ +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) + /* Use hardware-assisted paging if available? */ +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + uint32_t flags; +}; +typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); + +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_resumedomain 27 + +#define XEN_DOMCTL_getdomaininfo 5 +struct xen_domctl_getdomaininfo { + /* OUT variables. */ + domid_t domain; /* Also echoed in domctl.domain */ + /* Domain is scheduled to die. */ +#define _XEN_DOMINF_dying 0 +#define XEN_DOMINF_dying (1U<<_XEN_DOMINF_dying) + /* Domain is an HVM guest (as opposed to a PV guest). */ +#define _XEN_DOMINF_hvm_guest 1 +#define XEN_DOMINF_hvm_guest (1U<<_XEN_DOMINF_hvm_guest) + /* The guest OS has shut down. */ +#define _XEN_DOMINF_shutdown 2 +#define XEN_DOMINF_shutdown (1U<<_XEN_DOMINF_shutdown) + /* Currently paused by control software. */ +#define _XEN_DOMINF_paused 3 +#define XEN_DOMINF_paused (1U<<_XEN_DOMINF_paused) + /* Currently blocked pending an event. */ +#define _XEN_DOMINF_blocked 4 +#define XEN_DOMINF_blocked (1U<<_XEN_DOMINF_blocked) + /* Domain is currently running. */ +#define _XEN_DOMINF_running 5 +#define XEN_DOMINF_running (1U<<_XEN_DOMINF_running) + /* Being debugged. */ +#define _XEN_DOMINF_debugged 6 +#define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) + /* CPU to which this domain is bound. */ +#define XEN_DOMINF_cpumask 255 +#define XEN_DOMINF_cpushift 8 + /* XEN_DOMINF_shutdown guest-supplied code. */ +#define XEN_DOMINF_shutdownmask 255 +#define XEN_DOMINF_shutdownshift 16 + uint32_t flags; /* XEN_DOMINF_* */ + uint64_aligned_t tot_pages; + uint64_aligned_t max_pages; + uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ + uint64_aligned_t cpu_time; + uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ + uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ + uint32_t ssidref; + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); + + +#define XEN_DOMCTL_getmemlist 6 +struct xen_domctl_getmemlist { + /* IN variables. */ + /* Max entries to write to output buffer. */ + uint64_aligned_t max_pfns; + /* Start index in guest's page list. */ + uint64_aligned_t start_pfn; + XEN_GUEST_HANDLE_64(uint64_t) buffer; + /* OUT variables. */ + uint64_aligned_t num_pfns; +}; +typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); + + +#define XEN_DOMCTL_getpageframeinfo 7 + +#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 +#define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) +#define XEN_DOMCTL_PFINFO_L1TAB (0x1U<<28) +#define XEN_DOMCTL_PFINFO_L2TAB (0x2U<<28) +#define XEN_DOMCTL_PFINFO_L3TAB (0x3U<<28) +#define XEN_DOMCTL_PFINFO_L4TAB (0x4U<<28) +#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) +#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) +#define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) + +struct xen_domctl_getpageframeinfo { + /* IN variables. */ + uint64_aligned_t gmfn; /* GMFN to query */ + /* OUT variables. */ + /* Is the page PINNED to a type? */ + uint32_t type; /* see above type defs */ +}; +typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); + + +#define XEN_DOMCTL_getpageframeinfo2 8 +struct xen_domctl_getpageframeinfo2 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(uint32_t) array; +}; +typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); + + +/* + * Control shadow pagetables operation + */ +#define XEN_DOMCTL_shadow_op 10 + +/* Disable shadow mode. */ +#define XEN_DOMCTL_SHADOW_OP_OFF 0 + +/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE 32 + +/* Log-dirty bitmap operations. */ + /* Return the bitmap and clean internal copy for next round. */ +#define XEN_DOMCTL_SHADOW_OP_CLEAN 11 + /* Return the bitmap but do not modify internal copy. */ +#define XEN_DOMCTL_SHADOW_OP_PEEK 12 + +/* Memory allocation accessors. */ +#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION 30 +#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION 31 + +/* Legacy enable operations. */ + /* Equiv. to ENABLE with no mode flags. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST 1 + /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY 2 + /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */ +#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE 3 + +/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */ + /* + * Shadow pagetables are refcounted: guest does not use explicit mmu + * operations nor write-protect its pagetables. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT (1 << 1) + /* + * Log pages in a bitmap as they are dirtied. + * Used for live relocation to determine which pages must be re-sent. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2) + /* + * Automatically translate GPFNs into MFNs. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3) + /* + * Xen does not steal virtual address space from the guest. + * Requires HVM support. + */ +#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL (1 << 4) + +struct xen_domctl_shadow_op_stats { + uint32_t fault_count; + uint32_t dirty_count; +}; +typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t); + +struct xen_domctl_shadow_op { + /* IN variables. */ + uint32_t op; /* XEN_DOMCTL_SHADOW_OP_* */ + + /* OP_ENABLE */ + uint32_t mode; /* XEN_DOMCTL_SHADOW_ENABLE_* */ + + /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */ + uint32_t mb; /* Shadow memory allocation in MB */ + + /* OP_PEEK / OP_CLEAN */ + XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap; + uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ + struct xen_domctl_shadow_op_stats stats; +}; +typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); + + +#define XEN_DOMCTL_max_mem 11 +struct xen_domctl_max_mem { + /* IN variables. */ + uint64_aligned_t max_memkb; +}; +typedef struct xen_domctl_max_mem xen_domctl_max_mem_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); + + +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +struct xen_domctl_vcpucontext { + uint32_t vcpu; /* IN */ + XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); + + +#define XEN_DOMCTL_getvcpuinfo 14 +struct xen_domctl_getvcpuinfo { + /* IN variables. */ + uint32_t vcpu; + /* OUT variables. */ + uint8_t online; /* currently online (not hotplugged)? */ + uint8_t blocked; /* blocked waiting for an event? */ + uint8_t running; /* currently scheduled on its CPU? */ + uint64_aligned_t cpu_time; /* total cpu time consumed (ns) */ + uint32_t cpu; /* current mapping */ +}; +typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t); + + +/* Get/set which physical cpus a vcpu can execute on. */ +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_getvcpuaffinity 25 +struct xen_domctl_vcpuaffinity { + uint32_t vcpu; /* IN */ + struct xenctl_cpumap cpumap; /* IN/OUT */ +}; +typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); + + +#define XEN_DOMCTL_max_vcpus 15 +struct xen_domctl_max_vcpus { + uint32_t max; /* maximum number of vcpus */ +}; +typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); + + +#define XEN_DOMCTL_scheduler_op 16 +/* Scheduler types. */ +#define XEN_SCHEDULER_SEDF 4 +#define XEN_SCHEDULER_CREDIT 5 +/* Set or get info? */ +#define XEN_DOMCTL_SCHEDOP_putinfo 0 +#define XEN_DOMCTL_SCHEDOP_getinfo 1 +struct xen_domctl_scheduler_op { + uint32_t sched_id; /* XEN_SCHEDULER_* */ + uint32_t cmd; /* XEN_DOMCTL_SCHEDOP_* */ + union { + struct xen_domctl_sched_sedf { + uint64_aligned_t period; + uint64_aligned_t slice; + uint64_aligned_t latency; + uint32_t extratime; + uint32_t weight; + } sedf; + struct xen_domctl_sched_credit { + uint16_t weight; + uint16_t cap; + } credit; + } u; +}; +typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); + + +#define XEN_DOMCTL_setdomainhandle 17 +struct xen_domctl_setdomainhandle { + xen_domain_handle_t handle; +}; +typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); + + +#define XEN_DOMCTL_setdebugging 18 +struct xen_domctl_setdebugging { + uint8_t enable; +}; +typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); + + +#define XEN_DOMCTL_irq_permission 19 +struct xen_domctl_irq_permission { + uint8_t pirq; + uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ +}; +typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); + + +#define XEN_DOMCTL_iomem_permission 20 +struct xen_domctl_iomem_permission { + uint64_aligned_t first_mfn;/* first page (physical page number) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint8_t allow_access; /* allow (!0) or deny (0) access to range? */ +}; +typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); + + +#define XEN_DOMCTL_ioport_permission 21 +struct xen_domctl_ioport_permission { + uint32_t first_port; /* first port int range */ + uint32_t nr_ports; /* size of port range */ + uint8_t allow_access; /* allow or deny access to range? */ +}; +typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); + + +#define XEN_DOMCTL_hypercall_init 22 +struct xen_domctl_hypercall_init { + uint64_aligned_t gmfn; /* GMFN to be initialised */ +}; +typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); + + +#define XEN_DOMCTL_arch_setup 23 +#define _XEN_DOMAINSETUP_hvm_guest 0 +#define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) +#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ +#define XEN_DOMAINSETUP_query (1UL<<_XEN_DOMAINSETUP_query) +#define _XEN_DOMAINSETUP_sioemu_guest 2 +#define XEN_DOMAINSETUP_sioemu_guest (1UL<<_XEN_DOMAINSETUP_sioemu_guest) +typedef struct xen_domctl_arch_setup { + uint64_aligned_t flags; /* XEN_DOMAINSETUP_* */ +#ifdef __ia64__ + uint64_aligned_t bp; /* mpaddr of boot param area */ + uint64_aligned_t maxmem; /* Highest memory address for MDT. */ + uint64_aligned_t xsi_va; /* Xen shared_info area virtual address. */ + uint32_t hypercall_imm; /* Break imm for Xen hypercalls. */ + int8_t vhpt_size_log2; /* Log2 of VHPT size. */ +#endif +} xen_domctl_arch_setup_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); + + +#define XEN_DOMCTL_settimeoffset 24 +struct xen_domctl_settimeoffset { + int32_t time_offset_seconds; /* applied to domain wallclock time */ +}; +typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); + + +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +typedef struct xen_domctl_hvmcontext { + uint32_t size; /* IN/OUT: size of buffer / bytes filled */ + XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size + * req'd */ +} xen_domctl_hvmcontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); + + +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +typedef struct xen_domctl_address_size { + uint32_t size; +} xen_domctl_address_size_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); + + +#define XEN_DOMCTL_real_mode_area 26 +struct xen_domctl_real_mode_area { + uint32_t log; /* log2 of Real Mode Area size */ +}; +typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); + + +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_SENDTRIGGER_NMI 0 +#define XEN_DOMCTL_SENDTRIGGER_RESET 1 +#define XEN_DOMCTL_SENDTRIGGER_INIT 2 +struct xen_domctl_sendtrigger { + uint32_t trigger; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t); + + +/* Assign PCI device to HVM guest. Sets up IOMMU structures. */ +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_deassign_device 47 +struct xen_domctl_assign_device { + uint32_t machine_bdf; /* machine PCI ID of assigned device */ +}; +typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); + +/* Retrieve sibling devices infomation of machine_bdf */ +#define XEN_DOMCTL_get_device_group 50 +struct xen_domctl_get_device_group { + uint32_t machine_bdf; /* IN */ + uint32_t max_sdevs; /* IN */ + uint32_t num_sdevs; /* OUT */ + XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ +}; +typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); + +/* Pass-through interrupts: bind real irq -> hvm devfn. */ +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_unbind_pt_irq 48 +typedef enum pt_irq_type_e { + PT_IRQ_TYPE_PCI, + PT_IRQ_TYPE_ISA, + PT_IRQ_TYPE_MSI, +} pt_irq_type_t; +struct xen_domctl_bind_pt_irq { + uint32_t machine_irq; + pt_irq_type_t irq_type; + uint32_t hvm_domid; + + union { + struct { + uint8_t isa_irq; + } isa; + struct { + uint8_t bus; + uint8_t device; + uint8_t intx; + } pci; + struct { + uint8_t gvec; + uint32_t gflags; + } msi; + } u; +}; +typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t); + + +/* Bind machine I/O address range -> HVM address range. */ +#define XEN_DOMCTL_memory_mapping 39 +#define DPCI_ADD_MAPPING 1 +#define DPCI_REMOVE_MAPPING 0 +struct xen_domctl_memory_mapping { + uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */ + uint64_aligned_t first_mfn; /* first page (machine page) in range */ + uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ + uint32_t add_mapping; /* add or remove mapping */ + uint32_t padding; /* padding for 64-bit aligned structure */ +}; +typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t); + + +/* Bind machine I/O port range -> HVM I/O port range. */ +#define XEN_DOMCTL_ioport_mapping 40 +struct xen_domctl_ioport_mapping { + uint32_t first_gport; /* first guest IO port*/ + uint32_t first_mport; /* first machine IO port */ + uint32_t nr_ports; /* size of port range */ + uint32_t add_mapping; /* add or remove mapping */ +}; +typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t); + + +/* + * Pin caching type of RAM space for x86 HVM domU. + */ +#define XEN_DOMCTL_pin_mem_cacheattr 41 +/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ +#define XEN_DOMCTL_MEM_CACHEATTR_UC 0 +#define XEN_DOMCTL_MEM_CACHEATTR_WC 1 +#define XEN_DOMCTL_MEM_CACHEATTR_WT 4 +#define XEN_DOMCTL_MEM_CACHEATTR_WP 5 +#define XEN_DOMCTL_MEM_CACHEATTR_WB 6 +#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 +struct xen_domctl_pin_mem_cacheattr { + uint64_aligned_t start, end; + unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ +}; +typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); + + +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +struct xen_domctl_ext_vcpucontext { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: Size of struct (IN) + * GET: Size of struct (OUT) + */ + uint32_t size; +#if defined(__i386__) || defined(__x86_64__) + /* SYSCALL from 32-bit mode and SYSENTER callback information. */ + /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */ + uint64_aligned_t syscall32_callback_eip; + uint64_aligned_t sysenter_callback_eip; + uint16_t syscall32_callback_cs; + uint16_t sysenter_callback_cs; + uint8_t syscall32_disables_events; + uint8_t sysenter_disables_events; +#endif +}; +typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t); + +/* + * Set optimizaton features for a domain + */ +#define XEN_DOMCTL_set_opt_feature 44 +struct xen_domctl_set_opt_feature { +#if defined(__ia64__) + struct xen_ia64_opt_feature optf; +#else + /* Make struct non-empty: do not depend on this field name! */ + uint64_t dummy; +#endif +}; +typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t); + +/* + * Set the target domain for a domain + */ +#define XEN_DOMCTL_set_target 46 +struct xen_domctl_set_target { + domid_t target; +}; +typedef struct xen_domctl_set_target xen_domctl_set_target_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t); + +#if defined(__i386__) || defined(__x86_64__) +# define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF +# define XEN_DOMCTL_set_cpuid 49 +struct xen_domctl_cpuid { + unsigned int input[2]; + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; +}; +typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); +#endif + +#define XEN_DOMCTL_subscribe 29 +struct xen_domctl_subscribe { + uint32_t port; /* IN */ +}; +typedef struct xen_domctl_subscribe xen_domctl_subscribe_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t); + +/* + * Define the maximum machine address size which should be allocated + * to a guest. + */ +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 + + +struct xen_domctl { + uint32_t cmd; + uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ + domid_t domain; + union { + struct xen_domctl_createdomain createdomain; + struct xen_domctl_getdomaininfo getdomaininfo; + struct xen_domctl_getmemlist getmemlist; + struct xen_domctl_getpageframeinfo getpageframeinfo; + struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_vcpuaffinity vcpuaffinity; + struct xen_domctl_shadow_op shadow_op; + struct xen_domctl_max_mem max_mem; + struct xen_domctl_vcpucontext vcpucontext; + struct xen_domctl_getvcpuinfo getvcpuinfo; + struct xen_domctl_max_vcpus max_vcpus; + struct xen_domctl_scheduler_op scheduler_op; + struct xen_domctl_setdomainhandle setdomainhandle; + struct xen_domctl_setdebugging setdebugging; + struct xen_domctl_irq_permission irq_permission; + struct xen_domctl_iomem_permission iomem_permission; + struct xen_domctl_ioport_permission ioport_permission; + struct xen_domctl_hypercall_init hypercall_init; + struct xen_domctl_arch_setup arch_setup; + struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_real_mode_area real_mode_area; + struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_address_size address_size; + struct xen_domctl_sendtrigger sendtrigger; + struct xen_domctl_get_device_group get_device_group; + struct xen_domctl_assign_device assign_device; + struct xen_domctl_bind_pt_irq bind_pt_irq; + struct xen_domctl_memory_mapping memory_mapping; + struct xen_domctl_ioport_mapping ioport_mapping; + struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr; + struct xen_domctl_ext_vcpucontext ext_vcpucontext; + struct xen_domctl_set_opt_feature set_opt_feature; + struct xen_domctl_set_target set_target; + struct xen_domctl_subscribe subscribe; +#if defined(__i386__) || defined(__x86_64__) + struct xen_domctl_cpuid cpuid; +#endif + uint8_t pad[128]; + } u; +}; +typedef struct xen_domctl xen_domctl_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_t); + +#endif /* __XEN_PUBLIC_DOMCTL_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/domctl.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/nmi.h =================================================================== --- xen/interface/nmi.h (.../stable/6/sys) (revision 0) +++ xen/interface/nmi.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,78 @@ +/****************************************************************************** + * nmi.h + * + * NMI callback registration and reason codes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_NMI_H__ +#define __XEN_PUBLIC_NMI_H__ + +/* + * NMI reason codes: + * Currently these are x86-specific, stored in arch_shared_info.nmi_reason. + */ + /* I/O-check error reported via ISA port 0x61, bit 6. */ +#define _XEN_NMIREASON_io_error 0 +#define XEN_NMIREASON_io_error (1UL << _XEN_NMIREASON_io_error) + /* Parity error reported via ISA port 0x61, bit 7. */ +#define _XEN_NMIREASON_parity_error 1 +#define XEN_NMIREASON_parity_error (1UL << _XEN_NMIREASON_parity_error) + /* Unknown hardware-generated NMI. */ +#define _XEN_NMIREASON_unknown 2 +#define XEN_NMIREASON_unknown (1UL << _XEN_NMIREASON_unknown) + +/* + * long nmi_op(unsigned int cmd, void *arg) + * NB. All ops return zero on success, else a negative error code. + */ + +/* + * Register NMI callback for this (calling) VCPU. Currently this only makes + * sense for domain 0, vcpu 0. All other callers will be returned EINVAL. + * arg == pointer to xennmi_callback structure. + */ +#define XENNMI_register_callback 0 +struct xennmi_callback { + unsigned long handler_address; + unsigned long pad; +}; +typedef struct xennmi_callback xennmi_callback_t; +DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t); + +/* + * Deregister NMI callback for this (calling) VCPU. + * arg == NULL. + */ +#define XENNMI_unregister_callback 1 + +#endif /* __XEN_PUBLIC_NMI_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/nmi.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/interface/version.h =================================================================== --- xen/interface/version.h (.../stable/6/sys) (revision 0) +++ xen/interface/version.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,91 @@ +/****************************************************************************** + * version.h + * + * Xen version, type, and compile information. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Nguyen Anh Quynh + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_VERSION_H__ +#define __XEN_PUBLIC_VERSION_H__ + +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ + +/* arg == NULL; returns major:minor (16:16). */ +#define XENVER_version 0 + +/* arg == xen_extraversion_t. */ +#define XENVER_extraversion 1 +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) + +/* arg == xen_compile_info_t. */ +#define XENVER_compile_info 2 +struct xen_compile_info { + char compiler[64]; + char compile_by[16]; + char compile_domain[32]; + char compile_date[32]; +}; +typedef struct xen_compile_info xen_compile_info_t; + +#define XENVER_capabilities 3 +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) + +#define XENVER_changeset 4 +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) + +#define XENVER_platform_parameters 5 +struct xen_platform_parameters { + unsigned long virt_start; +}; +typedef struct xen_platform_parameters xen_platform_parameters_t; + +#define XENVER_get_features 6 +struct xen_feature_info { + unsigned int submap_idx; /* IN: which 32-bit submap to return */ + uint32_t submap; /* OUT: 32-bit submap */ +}; +typedef struct xen_feature_info xen_feature_info_t; + +/* Declares the features reported by XENVER_get_features. */ +#include "features.h" + +/* arg == NULL; returns host memory page size. */ +#define XENVER_pagesize 7 + +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#endif /* __XEN_PUBLIC_VERSION_H__ */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ Property changes on: xen/interface/version.h ___________________________________________________________________ Added: fbsd:nokeywords + true Property changes on: xen/interface ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/features.h =================================================================== --- xen/features.h (.../stable/6/sys) (revision 0) +++ xen/features.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,20 @@ +/****************************************************************************** + * features.h + * + * Query the features reported by Xen. + * + * Copyright (c) 2006, Ian Campbell + */ + +#ifndef __ASM_XEN_FEATURES_H__ +#define __ASM_XEN_FEATURES_H__ + +#include + +extern void setup_xen_features(void); + +extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32]; + +#define xen_feature(flag) (xen_features[flag]) + +#endif /* __ASM_XEN_FEATURES_H__ */ Property changes on: xen/features.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen/features.h:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen/features.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: xen/xenbus/xenbus_xs.c =================================================================== --- xen/xenbus/xenbus_xs.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_xs.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,929 @@ +/****************************************************************************** + * xenbus_xs.c + * + * This is the kernel equivalent of the "xs" library. We don't need everything + * and we use xenbus_comms for communication. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include +#include + +static int xs_process_msg(enum xsd_sockmsg_type *type); + +int xenwatch_running = 0; +int xenbus_running = 0; +int xen_store_evtchn; + +struct xs_stored_msg { + TAILQ_ENTRY(xs_stored_msg) list; + + struct xsd_sockmsg hdr; + + union { + /* Queued replies. */ + struct { + char *body; + } reply; + + /* Queued watch events. */ + struct { + struct xenbus_watch *handle; + char **vec; + unsigned int vec_size; + } watch; + } u; +}; + +struct xs_handle { + /* A list of replies. Currently only one will ever be outstanding. */ + TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list; + struct mtx reply_lock; + int reply_waitq; + + /* One request at a time. */ + struct sx request_mutex; + + /* Protect transactions against save/restore. */ + struct sx suspend_mutex; +}; + +static struct xs_handle xs_state; + +/* List of registered watches, and a lock to protect it. */ +static LIST_HEAD(watch_list_head, xenbus_watch) watches; +static struct mtx watches_lock; +/* List of pending watch callback events, and a lock to protect it. */ +static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events; +static struct mtx watch_events_lock; + +/* + * Details of the xenwatch callback kernel thread. The thread waits on the + * watch_events_waitq for work to do (queued on watch_events list). When it + * wakes up it acquires the xenwatch_mutex before reading the list and + * carrying out work. + */ +static pid_t xenwatch_pid; +struct sx xenwatch_mutex; +static int watch_events_waitq; + +#define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0])) + +static int +xs_get_error(const char *errorstring) +{ + unsigned int i; + + for (i = 0; i < xsd_error_count; i++) { + if (!strcmp(errorstring, xsd_errors[i].errstring)) + return (xsd_errors[i].errnum); + } + log(LOG_WARNING, "XENBUS xen store gave: unknown error %s", + errorstring); + return (EINVAL); +} + +extern void kdb_backtrace(void); + +static int +xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result) +{ + struct xs_stored_msg *msg; + char *body; + int error; + + mtx_lock(&xs_state.reply_lock); + + while (TAILQ_EMPTY(&xs_state.reply_list)) { + while (TAILQ_EMPTY(&xs_state.reply_list)) { + error = mtx_sleep(&xs_state.reply_waitq, + &xs_state.reply_lock, + PCATCH, "xswait", hz/10); + if (error && error != EWOULDBLOCK) { + mtx_unlock(&xs_state.reply_lock); + return (error); + } + } + } + + msg = TAILQ_FIRST(&xs_state.reply_list); + TAILQ_REMOVE(&xs_state.reply_list, msg, list); + + mtx_unlock(&xs_state.reply_lock); + + *type = msg->hdr.type; + if (len) + *len = msg->hdr.len; + body = msg->u.reply.body; + + free(msg, M_DEVBUF); + *result = body; + return (0); +} + +#if 0 +/* Emergency write. UNUSED*/ +void xenbus_debug_write(const char *str, unsigned int count) +{ + struct xsd_sockmsg msg = { 0 }; + + msg.type = XS_DEBUG; + msg.len = sizeof("print") + count + 1; + + sx_xlock(&xs_state.request_mutex); + xb_write(&msg, sizeof(msg)); + xb_write("print", sizeof("print")); + xb_write(str, count); + xb_write("", 1); + sx_xunlock(&xs_state.request_mutex); +} + +#endif + +int +xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result) +{ + struct xsd_sockmsg req_msg = *msg; + int error; + + if (req_msg.type == XS_TRANSACTION_START) + sx_slock(&xs_state.suspend_mutex); + + sx_xlock(&xs_state.request_mutex); + + error = xb_write(msg, sizeof(*msg) + msg->len, + &xs_state.request_mutex.lock_object); + if (error) { + msg->type = XS_ERROR; + } else { + error = xs_read_reply(&msg->type, &msg->len, result); + } + + sx_xunlock(&xs_state.request_mutex); + + if ((msg->type == XS_TRANSACTION_END) || + ((req_msg.type == XS_TRANSACTION_START) && + (msg->type == XS_ERROR))) + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +/* + * Send message to xs. The reply is returned in *result and should be + * fred with free(*result, M_DEVBUF). Return zero on success or an + * error code on failure. + */ +static int +xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type, + const struct iovec *iovec, unsigned int num_vecs, + unsigned int *len, void **result) +{ + struct xsd_sockmsg msg; + void *ret = NULL; + unsigned int i; + int error; + + msg.tx_id = t.id; + msg.req_id = 0; + msg.type = type; + msg.len = 0; + for (i = 0; i < num_vecs; i++) + msg.len += iovec[i].iov_len; + + sx_xlock(&xs_state.request_mutex); + + error = xb_write(&msg, sizeof(msg), + &xs_state.request_mutex.lock_object); + if (error) { + sx_xunlock(&xs_state.request_mutex); + printf("xs_talkv failed %d\n", error); + return (error); + } + + for (i = 0; i < num_vecs; i++) { + error = xb_write(iovec[i].iov_base, iovec[i].iov_len, + &xs_state.request_mutex.lock_object); + if (error) { + sx_xunlock(&xs_state.request_mutex); + printf("xs_talkv failed %d\n", error); + return (error); + } + } + + error = xs_read_reply(&msg.type, len, &ret); + + sx_xunlock(&xs_state.request_mutex); + + if (error) + return (error); + + if (msg.type == XS_ERROR) { + error = xs_get_error(ret); + free(ret, M_DEVBUF); + return (error); + } + +#if 0 + if ((xenwatch_running == 0) && (xenwatch_inline == 0)) { + xenwatch_inline = 1; + while (!TAILQ_EMPTY(&watch_events) + && xenwatch_running == 0) { + + struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events); + TAILQ_REMOVE(&watch_events, wmsg, list); + + wmsg->u.watch.handle->callback( + wmsg->u.watch.handle, + (const char **)wmsg->u.watch.vec, + wmsg->u.watch.vec_size); + free(wmsg->u.watch.vec, M_DEVBUF); + free(wmsg, M_DEVBUF); + } + xenwatch_inline = 0; + } +#endif + KASSERT(msg.type == type, ("bad xenstore message type")); + + if (result) + *result = ret; + else + free(ret, M_DEVBUF); + + return (0); +} + +/* Simplified version of xs_talkv: single message. */ +static int +xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type, + const char *string, unsigned int *len, void **result) +{ + struct iovec iovec; + + iovec.iov_base = (void *)(uintptr_t) string; + iovec.iov_len = strlen(string) + 1; + + return (xs_talkv(t, type, &iovec, 1, len, result)); +} + +static unsigned int +count_strings(const char *strings, unsigned int len) +{ + unsigned int num; + const char *p; + + for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1) + num++; + + return num; +} + +/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ +static char * +join(const char *dir, const char *name) +{ + char *buffer; + + buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1, + M_DEVBUF, M_WAITOK); + + strcpy(buffer, dir); + if (strcmp(name, "")) { + strcat(buffer, "/"); + strcat(buffer, name); + } + + return (buffer); +} + +static char ** +split(char *strings, unsigned int len, unsigned int *num) +{ + char *p, **ret; + + /* Count the strings. */ + *num = count_strings(strings, len) + 1; + + /* Transfer to one big alloc for easy freeing. */ + ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK); + memcpy(&ret[*num], strings, len); + free(strings, M_DEVBUF); + + strings = (char *)&ret[*num]; + for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) + ret[(*num)++] = p; + + ret[*num] = strings + len; + + return ret; +} + +/* + * Return the contents of a directory in *result which should be freed + * with free(*result, M_DEVBUF). + */ +int +xenbus_directory(struct xenbus_transaction t, const char *dir, + const char *node, unsigned int *num, char ***result) +{ + char *strings, *path; + unsigned int len = 0; + int error; + + path = join(dir, node); + error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings); + free(path, M_DEVBUF); + if (error) + return (error); + + *result = split(strings, len, num); + return (0); +} + +/* + * Check if a path exists. Return 1 if it does. + */ +int +xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node) +{ + char **d; + int error, dir_n; + + error = xenbus_directory(t, dir, node, &dir_n, &d); + if (error) + return (0); + free(d, M_DEVBUF); + return (1); +} + +/* + * Get the value of a single file. Returns the contents in *result + * which should be freed with free(*result, M_DEVBUF) after use. + * The length of the value in bytes is returned in *len. + */ +int +xenbus_read(struct xenbus_transaction t, const char *dir, const char *node, + unsigned int *len, void **result) +{ + char *path; + void *ret; + int error; + + path = join(dir, node); + error = xs_single(t, XS_READ, path, len, &ret); + free(path, M_DEVBUF); + if (error) + return (error); + *result = ret; + return (0); +} + +/* + * Write the value of a single file. Returns error on failure. + */ +int +xenbus_write(struct xenbus_transaction t, const char *dir, const char *node, + const char *string) +{ + char *path; + struct iovec iovec[2]; + int error; + + path = join(dir, node); + + iovec[0].iov_base = (void *)(uintptr_t) path; + iovec[0].iov_len = strlen(path) + 1; + iovec[1].iov_base = (void *)(uintptr_t) string; + iovec[1].iov_len = strlen(string); + + error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL); + free(path, M_DEVBUF); + + return (error); +} + +/* + * Create a new directory. + */ +int +xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + ret = xs_single(t, XS_MKDIR, path, NULL, NULL); + free(path, M_DEVBUF); + + return (ret); +} + +/* + * Destroy a file or directory (directories must be empty). + */ +int +xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node) +{ + char *path; + int ret; + + path = join(dir, node); + ret = xs_single(t, XS_RM, path, NULL, NULL); + free(path, M_DEVBUF); + + return (ret); +} + +/* + * Start a transaction: changes by others will not be seen during this + * transaction, and changes will not be visible to others until end. + */ +int +xenbus_transaction_start(struct xenbus_transaction *t) +{ + char *id_str; + int error; + + sx_slock(&xs_state.suspend_mutex); + error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL, + (void **) &id_str); + if (error) { + sx_sunlock(&xs_state.suspend_mutex); + return (error); + } + + t->id = strtoul(id_str, NULL, 0); + free(id_str, M_DEVBUF); + + return (0); +} + +/* + * End a transaction. If abandon is true, transaction is discarded + * instead of committed. + */ +int xenbus_transaction_end(struct xenbus_transaction t, int abort) +{ + char abortstr[2]; + int error; + + if (abort) + strcpy(abortstr, "F"); + else + strcpy(abortstr, "T"); + + error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL); + + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +/* Single read and scanf: returns zero or errno. */ +int +xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, int *scancountp, const char *fmt, ...) +{ + va_list ap; + int error, ns; + char *val; + + error = xenbus_read(t, dir, node, NULL, (void **) &val); + if (error) + return (error); + + va_start(ap, fmt); + ns = vsscanf(val, fmt, ap); + va_end(ap); + free(val, M_DEVBUF); + /* Distinctive errno. */ + if (ns == 0) + return (ERANGE); + if (scancountp) + *scancountp = ns; + return (0); +} + +/* Single printf and write: returns zero or errno. */ +int +xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) +{ + va_list ap; + int error, ret; +#define PRINTF_BUFFER_SIZE 4096 + char *printf_buffer; + + printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + + va_start(ap, fmt); + ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap); + va_end(ap); + + KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large")); + error = xenbus_write(t, dir, node, printf_buffer); + + free(printf_buffer, M_DEVBUF); + + return (error); +} + +/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */ +int +xenbus_gather(struct xenbus_transaction t, const char *dir, ...) +{ + va_list ap; + const char *name; + int error, i; + + for (i = 0; i < 10000; i++) + HYPERVISOR_yield(); + + va_start(ap, dir); + error = 0; + while (error == 0 && (name = va_arg(ap, char *)) != NULL) { + const char *fmt = va_arg(ap, char *); + void *result = va_arg(ap, void *); + char *p; + + error = xenbus_read(t, dir, name, NULL, (void **) &p); + if (error) + break; + + if (fmt) { + if (sscanf(p, fmt, result) == 0) + error = EINVAL; + free(p, M_DEVBUF); + } else + *(char **)result = p; + } + va_end(ap); + + return (error); +} + +static int +xs_watch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL)); +} + +static int +xs_unwatch(const char *path, const char *token) +{ + struct iovec iov[2]; + + iov[0].iov_base = (void *)(uintptr_t) path; + iov[0].iov_len = strlen(path) + 1; + iov[1].iov_base = (void *)(uintptr_t) token; + iov[1].iov_len = strlen(token) + 1; + + return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL)); +} + +static struct xenbus_watch * +find_watch(const char *token) +{ + struct xenbus_watch *i, *cmp; + + cmp = (void *)strtoul(token, NULL, 16); + + LIST_FOREACH(i, &watches, list) + if (i == cmp) + return (i); + + return (NULL); +} + +/* Register callback to watch this node. */ +int +register_xenbus_watch(struct xenbus_watch *watch) +{ + /* Pointer in ascii is the token. */ + char token[sizeof(watch) * 2 + 1]; + int error; + + sprintf(token, "%lX", (long)watch); + + sx_slock(&xs_state.suspend_mutex); + + mtx_lock(&watches_lock); + KASSERT(find_watch(token) == NULL, ("watch already registered")); + LIST_INSERT_HEAD(&watches, watch, list); + mtx_unlock(&watches_lock); + + error = xs_watch(watch->node, token); + + /* Ignore errors due to multiple registration. */ + if (error == EEXIST) { + mtx_lock(&watches_lock); + LIST_REMOVE(watch, list); + mtx_unlock(&watches_lock); + } + + sx_sunlock(&xs_state.suspend_mutex); + + return (error); +} + +void +unregister_xenbus_watch(struct xenbus_watch *watch) +{ + struct xs_stored_msg *msg, *tmp; + char token[sizeof(watch) * 2 + 1]; + int error; + + sprintf(token, "%lX", (long)watch); + + sx_slock(&xs_state.suspend_mutex); + + mtx_lock(&watches_lock); + KASSERT(find_watch(token), ("watch not registered")); + LIST_REMOVE(watch, list); + mtx_unlock(&watches_lock); + + error = xs_unwatch(watch->node, token); + if (error) + log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n", + watch->node, error); + + sx_sunlock(&xs_state.suspend_mutex); + + /* Cancel pending watch events. */ + mtx_lock(&watch_events_lock); + TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) { + if (msg->u.watch.handle != watch) + continue; + TAILQ_REMOVE(&watch_events, msg, list); + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + mtx_unlock(&watch_events_lock); + + /* Flush any currently-executing callback, unless we are it. :-) */ + if (curproc->p_pid != xenwatch_pid) { + sx_xlock(&xenwatch_mutex); + sx_xunlock(&xenwatch_mutex); + } +} + +void +xs_suspend(void) +{ + + sx_xlock(&xs_state.suspend_mutex); + sx_xlock(&xs_state.request_mutex); +} + +void +xs_resume(void) +{ + struct xenbus_watch *watch; + char token[sizeof(watch) * 2 + 1]; + + sx_xunlock(&xs_state.request_mutex); + + /* No need for watches_lock: the suspend_mutex is sufficient. */ + LIST_FOREACH(watch, &watches, list) { + sprintf(token, "%lX", (long)watch); + xs_watch(watch->node, token); + } + + sx_xunlock(&xs_state.suspend_mutex); +} + +static void +xenwatch_thread(void *unused) +{ + struct xs_stored_msg *msg; + + for (;;) { + + mtx_lock(&watch_events_lock); + while (TAILQ_EMPTY(&watch_events)) + mtx_sleep(&watch_events_waitq, + &watch_events_lock, + PWAIT | PCATCH, "waitev", hz/10); + + mtx_unlock(&watch_events_lock); + sx_xlock(&xenwatch_mutex); + + mtx_lock(&watch_events_lock); + msg = TAILQ_FIRST(&watch_events); + if (msg) + TAILQ_REMOVE(&watch_events, msg, list); + mtx_unlock(&watch_events_lock); + + if (msg != NULL) { + msg->u.watch.handle->callback( + msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + + sx_xunlock(&xenwatch_mutex); + } +} + +static int +xs_process_msg(enum xsd_sockmsg_type *type) +{ + struct xs_stored_msg *msg; + char *body; + int error; + + msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK); + mtx_lock(&xs_state.reply_lock); + error = xb_read(&msg->hdr, sizeof(msg->hdr), + &xs_state.reply_lock.mtx_object); + mtx_unlock(&xs_state.reply_lock); + if (error) { + free(msg, M_DEVBUF); + return (error); + } + + body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK); + mtx_lock(&xs_state.reply_lock); + error = xb_read(body, msg->hdr.len, + &xs_state.reply_lock.mtx_object); + mtx_unlock(&xs_state.reply_lock); + if (error) { + free(body, M_DEVBUF); + free(msg, M_DEVBUF); + return (error); + } + body[msg->hdr.len] = '\0'; + + *type = msg->hdr.type; + if (msg->hdr.type == XS_WATCH_EVENT) { + msg->u.watch.vec = split(body, msg->hdr.len, + &msg->u.watch.vec_size); + + mtx_lock(&watches_lock); + msg->u.watch.handle = find_watch( + msg->u.watch.vec[XS_WATCH_TOKEN]); + if (msg->u.watch.handle != NULL) { + mtx_lock(&watch_events_lock); + TAILQ_INSERT_TAIL(&watch_events, msg, list); + wakeup(&watch_events_waitq); + mtx_unlock(&watch_events_lock); + } else { + free(msg->u.watch.vec, M_DEVBUF); + free(msg, M_DEVBUF); + } + mtx_unlock(&watches_lock); + } else { + msg->u.reply.body = body; + mtx_lock(&xs_state.reply_lock); + TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list); + wakeup(&xs_state.reply_waitq); + mtx_unlock(&xs_state.reply_lock); + } + + return 0; +} + +static void +xenbus_thread(void *unused) +{ + int error; + enum xsd_sockmsg_type type; + + xenbus_running = 1; + + for (;;) { + error = xs_process_msg(&type); + if (error) + printf("XENBUS error %d while reading message\n", + error); + } +} + +#ifdef XENHVM +static unsigned long xen_store_mfn; +char *xen_store; + +static inline unsigned long +hvm_get_parameter(int index) +{ + struct xen_hvm_param xhv; + int error; + + xhv.domid = DOMID_SELF; + xhv.index = index; + error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); + if (error) { + printf("hvm_get_parameter: failed to get %d, error %d\n", + index, error); + return (0); + } + return (xhv.value); +} + +#endif + +int +xs_init(void) +{ + int error; + struct proc *p; + +#ifdef XENHVM + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE); +#else + xen_store_evtchn = xen_start_info->store_evtchn; +#endif + + TAILQ_INIT(&xs_state.reply_list); + TAILQ_INIT(&watch_events); + sx_init(&xenwatch_mutex, "xenwatch"); + + + mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF); + sx_init(&xs_state.request_mutex, "xenstore request"); + sx_init(&xs_state.suspend_mutex, "xenstore suspend"); + + +#if 0 + mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF); + sema_init(&xs_state.request_mutex, 1, "xenstore request"); + sema_init(&xenwatch_mutex, 1, "xenwatch"); +#endif + mtx_init(&watches_lock, "watches", NULL, MTX_DEF); + mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF); + + /* Initialize the shared memory rings to talk to xenstored */ + error = xb_init_comms(); + if (error) + return (error); + + xenwatch_running = 1; + error = kthread_create(xenwatch_thread, NULL, &p, + RFHIGHPID, 0, "xenwatch"); + if (error) + return (error); + xenwatch_pid = p->p_pid; + + error = kthread_create(xenbus_thread, NULL, NULL, + RFHIGHPID, 0, "xenbus"); + + return (error); +} Property changes on: xen/xenbus/xenbus_xs.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/init.txt =================================================================== --- xen/xenbus/init.txt (.../stable/6/sys) (revision 0) +++ xen/xenbus/init.txt (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,14 @@ + + +- frontend driver initializes static xenbus_driver with _ids, _probe, _remove, +_resume, _otherend_changed + + - initialization calls xenbus_register_frontend(xenbus_driver) + + - xenbus_register_frontend sets read_otherend details to read_backend_details + then calls xenbus_register_driver_common(xenbus_driver, xenbus_frontend) + + - xenbus_register_driver_common sets underlying driver name to xenbus_driver name + underlying driver bus to xenbus_frontend's bus, driver's probe to xenbus_dev_probe + driver's remove to xenbus_dev_remove then calls driver_register + Property changes on: xen/xenbus/init.txt ___________________________________________________________________ Added: fbsd:nokeyword + true Added: fbsd:nokeywords + true Index: xen/xenbus/xenbus_client.c =================================================================== --- xen/xenbus/xenbus_client.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_client.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,251 @@ +/****************************************************************************** + * Client-facing interface for the Xenbus driver. In other words, the + * interface between the Xenbus and the device-specific code, be it the + * frontend or the backend of that driver. + * + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#if 0 +#define DPRINTK(fmt, args...) \ + printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +const char * +xenbus_strstate(XenbusState state) +{ + static const char *const name[] = { + [ XenbusStateUnknown ] = "Unknown", + [ XenbusStateInitialising ] = "Initialising", + [ XenbusStateInitWait ] = "InitWait", + [ XenbusStateInitialised ] = "Initialised", + [ XenbusStateConnected ] = "Connected", + [ XenbusStateClosing ] = "Closing", + [ XenbusStateClosed ] = "Closed", + }; + + return ((state < (XenbusStateClosed + 1)) ? name[state] : "INVALID"); +} + +int +xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, const char **, unsigned int)) +{ + int error; + + watch->node = path; + watch->callback = callback; + + error = register_xenbus_watch(watch); + + if (error) { + watch->node = NULL; + watch->callback = NULL; + xenbus_dev_fatal(dev, error, "adding watch on %s", path); + } + + return (error); +} + +int +xenbus_watch_path2(device_t dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, const char **, unsigned int)) +{ + int error; + char *state = malloc(strlen(path) + 1 + strlen(path2) + 1, + M_DEVBUF, M_WAITOK); + + strcpy(state, path); + strcat(state, "/"); + strcat(state, path2); + + error = xenbus_watch_path(dev, state, watch, callback); + if (error) { + free(state, M_DEVBUF); + } + + return (error); +} + +/** + * Return the path to the error node for the given device, or NULL on failure. + * If the value returned is non-NULL, then it is the caller's to kfree. + */ +static char * +error_path(device_t dev) +{ + char *path_buffer = malloc(strlen("error/") + + strlen(xenbus_get_node(dev)) + 1, M_DEVBUF, M_WAITOK); + + strcpy(path_buffer, "error/"); + strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev)); + + return (path_buffer); +} + + +static void +_dev_error(device_t dev, int err, const char *fmt, va_list ap) +{ + int ret; + unsigned int len; + char *printf_buffer = NULL, *path_buffer = NULL; + +#define PRINTF_BUFFER_SIZE 4096 + printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK); + + len = sprintf(printf_buffer, "%i ", err); + ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); + + KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big")); +#if 0 + dev_err(&dev->dev, "%s\n", printf_buffer); +#endif + path_buffer = error_path(dev); + + if (path_buffer == NULL) { + printf("xenbus: failed to write error node for %s (%s)\n", + xenbus_get_node(dev), printf_buffer); + goto fail; + } + + if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { + printf("xenbus: failed to write error node for %s (%s)\n", + xenbus_get_node(dev), printf_buffer); + goto fail; + } + + fail: + if (printf_buffer) + free(printf_buffer, M_DEVBUF); + if (path_buffer) + free(path_buffer, M_DEVBUF); +} + +void +xenbus_dev_error(device_t dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); +} + +void +xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, ap); + va_end(ap); + + xenbus_set_state(dev, XenbusStateClosing); +} + +int +xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp) +{ + int error; + grant_ref_t ref; + + error = gnttab_grant_foreign_access( + xenbus_get_otherend_id(dev), ring_mfn, 0, &ref); + if (error) { + xenbus_dev_fatal(dev, error, "granting access to ring page"); + return (error); + } + + *refp = ref; + return (0); +} + +int +xenbus_alloc_evtchn(device_t dev, int *port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = xenbus_get_otherend_id(dev); + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + if (err) { + xenbus_dev_fatal(dev, -err, "allocating event channel"); + return (-err); + } + *port = alloc_unbound.port; + return (0); +} + +int +xenbus_free_evtchn(device_t dev, int port) +{ + struct evtchn_close close; + int err; + + close.port = port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + if (err) { + xenbus_dev_error(dev, -err, "freeing event channel %d", port); + return (-err); + } + return (0); +} + +XenbusState +xenbus_read_driver_state(const char *path) +{ + XenbusState result; + int error; + + error = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL); + if (error) + result = XenbusStateClosed; + + return (result); +} Property changes on: xen/xenbus/xenbus_client.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbus_comms.c =================================================================== --- xen/xenbus/xenbus_comms.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_comms.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,226 @@ +/****************************************************************************** + * xenbus_comms.c + * + * Low level code to talks to Xen Store: ringbuffer and event channel. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +static unsigned int xenstore_irq; + +static inline struct xenstore_domain_interface * +xenstore_domain_interface(void) +{ + + return (struct xenstore_domain_interface *)xen_store; +} + +static void +xb_intr(void * arg __attribute__((unused))) +{ + + wakeup(xen_store); +} + +static int +xb_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod) +{ + + return ((prod - cons) <= XENSTORE_RING_SIZE); +} + +static void * +xb_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, + char *buf, uint32_t *len) +{ + + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod); + if ((XENSTORE_RING_SIZE - (prod - cons)) < *len) + *len = XENSTORE_RING_SIZE - (prod - cons); + return (buf + MASK_XENSTORE_IDX(prod)); +} + +static const void * +xb_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod, + const char *buf, uint32_t *len) +{ + + *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons); + if ((prod - cons) < *len) + *len = prod - cons; + return (buf + MASK_XENSTORE_IDX(cons)); +} + +int +xb_write(const void *tdata, unsigned len, struct lock_object *lock) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + const char *data = (const char *)tdata; + int error; + + while (len != 0) { + void *dst; + unsigned int avail; + + while ((intf->req_prod - intf->req_cons) + == XENSTORE_RING_SIZE) { + error = _sleep(intf, + lock, + PCATCH, "xbwrite", hz/10); + if (error && error != EWOULDBLOCK) + return (error); + } + + /* Read indexes, then verify. */ + cons = intf->req_cons; + prod = intf->req_prod; + mb(); + if (!xb_check_indexes(cons, prod)) { + intf->req_cons = intf->req_prod = 0; + return (EIO); + } + + dst = xb_get_output_chunk(cons, prod, intf->req, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + mb(); + + memcpy(dst, data, avail); + data += avail; + len -= avail; + + /* Other side must not see new header until data is there. */ + wmb(); + intf->req_prod += avail; + + /* This implies mb() before other side sees interrupt. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return (0); +} + +int +xb_read(void *tdata, unsigned len, struct lock_object *lock) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + XENSTORE_RING_IDX cons, prod; + char *data = (char *)tdata; + int error; + + while (len != 0) { + unsigned int avail; + const char *src; + + while (intf->rsp_cons == intf->rsp_prod) { + error = _sleep(intf, lock, + PCATCH, "xbread", hz/10); + if (error && error != EWOULDBLOCK) + return (error); + } + + /* Read indexes, then verify. */ + cons = intf->rsp_cons; + prod = intf->rsp_prod; + if (!xb_check_indexes(cons, prod)) { + intf->rsp_cons = intf->rsp_prod = 0; + return (EIO); + } + + src = xb_get_input_chunk(cons, prod, intf->rsp, &avail); + if (avail == 0) + continue; + if (avail > len) + avail = len; + + /* We must read header before we read data. */ + rmb(); + + memcpy(data, src, avail); + data += avail; + len -= avail; + + /* Other side must not see free space until we've copied out */ + mb(); + intf->rsp_cons += avail; + + /* Implies mb(): they will see new header. */ + notify_remote_via_evtchn(xen_store_evtchn); + } + + return (0); +} + +/* Set up interrupt handler off store event channel. */ +int +xb_init_comms(void) +{ + struct xenstore_domain_interface *intf = xenstore_domain_interface(); + int error; + + if (intf->rsp_prod != intf->rsp_cons) { + log(LOG_WARNING, "XENBUS response ring is not quiescent " + "(%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); + intf->rsp_cons = intf->rsp_prod; + } + + if (xenstore_irq) + unbind_from_irqhandler(xenstore_irq); + + error = bind_caller_port_to_irqhandler( + xen_store_evtchn, "xenbus", + xb_intr, NULL, INTR_TYPE_NET, &xenstore_irq); + if (error) { + log(LOG_WARNING, "XENBUS request irq failed %i\n", error); + return (error); + } + + return (0); +} Property changes on: xen/xenbus/xenbus_comms.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbus_dev.c =================================================================== --- xen/xenbus/xenbus_dev.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_dev.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,229 @@ +/* + * xenbus_dev.c + * + * Driver giving user-space access to the kernel's xenbus connection + * to xenstore. + * + * Copyright (c) 2005, Christian Limpach + * Copyright (c) 2005, Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct xenbus_dev_transaction { + LIST_ENTRY(xenbus_dev_transaction) list; + struct xenbus_transaction handle; +}; + +struct xenbus_dev_data { + /* In-progress transaction. */ + LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions; + + /* Partial request. */ + unsigned int len; + union { + struct xsd_sockmsg msg; + char buffer[PAGE_SIZE]; + } u; + + /* Response queue. */ +#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1)) + char read_buffer[PAGE_SIZE]; + unsigned int read_cons, read_prod; +}; + +static int +xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int error; + struct xenbus_dev_data *u = dev->si_drv1; + + while (u->read_prod == u->read_cons) { + error = tsleep(u, PCATCH, "xbdread", hz/10); + if (error && error != EWOULDBLOCK) + return (error); + } + + while (uio->uio_resid > 0) { + if (u->read_cons == u->read_prod) + break; + error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)], + 1, uio); + if (error) + return (error); + u->read_cons++; + } + return (0); +} + +static void +queue_reply(struct xenbus_dev_data *u, char *data, unsigned int len) +{ + int i; + + for (i = 0; i < len; i++, u->read_prod++) + u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i]; + + KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer), + ("xenstore reply too big")); + + wakeup(u); +} + +static int +xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int error; + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans; + void *reply; + int len = uio->uio_resid; + + if ((len + u->len) > sizeof(u->u.buffer)) + return (EINVAL); + + error = uiomove(u->u.buffer + u->len, len, uio); + if (error) + return (error); + + u->len += len; + if (u->len < (sizeof(u->u.msg) + u->u.msg.len)) + return (0); + + switch (u->u.msg.type) { + case XS_TRANSACTION_START: + case XS_TRANSACTION_END: + case XS_DIRECTORY: + case XS_READ: + case XS_GET_PERMS: + case XS_RELEASE: + case XS_GET_DOMAIN_PATH: + case XS_WRITE: + case XS_MKDIR: + case XS_RM: + case XS_SET_PERMS: + error = xenbus_dev_request_and_reply(&u->u.msg, &reply); + if (!error) { + if (u->u.msg.type == XS_TRANSACTION_START) { + trans = malloc(sizeof(*trans), M_DEVBUF, + M_WAITOK); + trans->handle.id = strtoul(reply, NULL, 0); + LIST_INSERT_HEAD(&u->transactions, trans, list); + } else if (u->u.msg.type == XS_TRANSACTION_END) { + LIST_FOREACH(trans, &u->transactions, list) + if (trans->handle.id == u->u.msg.tx_id) + break; +#if 0 /* XXX does this mean the list is empty? */ + BUG_ON(&trans->list == &u->transactions); +#endif + LIST_REMOVE(trans, list); + free(trans, M_DEVBUF); + } + queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg)); + queue_reply(u, (char *)reply, u->u.msg.len); + free(reply, M_DEVBUF); + } + break; + + default: + error = EINVAL; + break; + } + + if (error == 0) + u->len = 0; + + return (error); +} + +static int +xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u; + + if (xen_store_evtchn == 0) + return (ENOENT); +#if 0 /* XXX figure out if equiv needed */ + nonseekable_open(inode, filp); +#endif + u = malloc(sizeof(*u), M_DEVBUF, M_WAITOK|M_ZERO); + LIST_INIT(&u->transactions); + dev->si_drv1 = u; + + return (0); +} + +static int +xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct xenbus_dev_data *u = dev->si_drv1; + struct xenbus_dev_transaction *trans, *tmp; + + LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) { + xenbus_transaction_end(trans->handle, 1); + LIST_REMOVE(trans, list); + free(trans, M_DEVBUF); + } + + free(u, M_DEVBUF); + return (0); +} + +static struct cdevsw xenbus_dev_cdevsw = { + .d_version = D_VERSION, + .d_read = xenbus_dev_read, + .d_write = xenbus_dev_write, + .d_open = xenbus_dev_open, + .d_close = xenbus_dev_close, + .d_name = "xenbus_dev", +}; + +static int +xenbus_dev_sysinit(void) +{ + make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400, + "xen/xenbus"); + + return (0); +} +SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE, + xenbus_dev_sysinit, NULL); Property changes on: xen/xenbus/xenbus_dev.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbus_probe_backend.c =================================================================== --- xen/xenbus/xenbus_probe_backend.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_probe_backend.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,308 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have (backend half). + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005, 2006 XenSource Ltd + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#define BUG_ON PANIC_IF +#define semaphore sema +#define rw_semaphore sema +#define DEFINE_SPINLOCK(lock) struct mtx lock +#define DECLARE_MUTEX(lock) struct sema lock +#define u32 uint32_t +#define list_del(head, ent) TAILQ_REMOVE(head, ent, list) +#define simple_strtoul strtoul +#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) +#define list_empty TAILQ_EMPTY + +extern struct xendev_list_head xenbus_device_backend_list; +#if 0 +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); +#endif +static int xenbus_probe_backend(const char *type, const char *domid); + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "frontend-id", "frontend"); +} + +/* backend/// => -- */ +static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename) +{ + int domid, err; + const char *devid, *type, *frontend; + unsigned int typelen; + + type = strchr(nodename, '/'); + if (!type) + return -EINVAL; + type++; + typelen = strcspn(type, "/"); + if (!typelen || type[typelen] != '/') + return -EINVAL; + + devid = strrchr(nodename, '/') + 1; + + err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid, + "frontend", NULL, &frontend, + NULL); + if (err) + return err; + if (strlen(frontend) == 0) + err = -ERANGE; + if (!err && !xenbus_exists(XBT_NIL, frontend, "")) + err = -ENOENT; + kfree(frontend); + + if (err) + return err; + + if (snprintf(bus_id, BUS_ID_SIZE, + "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE) + return -ENOSPC; + return 0; +} + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .bus = &xenbus_device_backend_list, + +#if 0 + .error = -ENODEV, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_backend, + }, + .dev = { + .bus_id = "xen-backend", + }, +#endif +}; + +#if 0 +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + struct xenbus_driver *drv; + int i = 0; + int length = 0; + + DPRINTK(""); + + if (dev == NULL) + return -ENODEV; + + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; +2 + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_BASE_PATH=%s", xenbus_backend.root); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; + + if (dev->driver) { + drv = to_xenbus_driver(dev->driver); + if (drv && drv->uevent) + return drv->uevent(xdev, envp, num_envp, buffer, + buffer_size); + } + + return 0; +} +#endif + +int xenbus_register_backend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend); +} + +/* backend/// */ +static int xenbus_probe_backend_unit(const char *dir, + const char *type, + const char *name) +{ + char *nodename; + int err; + + nodename = kasprintf("%s/%s", dir, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s\n", nodename); + + err = xenbus_probe_node(&xenbus_backend, type, nodename); + kfree(nodename); + return err; +} + +/* backend// */ +static int xenbus_probe_backend(const char *type, const char *domid) +{ + char *nodename; + int err = 0; + char **dir; + unsigned int i, dir_n = 0; + + DPRINTK(""); + + nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid); + if (!nodename) + return -ENOMEM; + + dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n); + if (IS_ERR(dir)) { + kfree(nodename); + return PTR_ERR(dir); + } + + for (i = 0; i < dir_n; i++) { + err = xenbus_probe_backend_unit(nodename, type, dir[i]); + if (err) + break; + } + kfree(dir); + kfree(nodename); + return err; +} + +static void backend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); +} + +static struct xenbus_watch be_watch = { + .node = "backend", + .callback = backend_changed, +}; +#if 0 +void xenbus_backend_suspend(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} + +void xenbus_backend_resume(int (*fn)(struct device *, void *)) +{ + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); +} +#endif +void xenbus_backend_probe_and_watch(void) +{ + xenbus_probe_devices(&xenbus_backend); + register_xenbus_watch(&be_watch); +} + +#if 0 +void xenbus_backend_bus_register(void) +{ + xenbus_backend.error = bus_register(&xenbus_backend.bus); + if (xenbus_backend.error) + log(LOG_WARNING, + "XENBUS: Error registering backend bus: %i\n", + xenbus_backend.error); +} + +void xenbus_backend_device_register(void) +{ + if (xenbus_backend.error) + return; + + xenbus_backend.error = device_register(&xenbus_backend.dev); + if (xenbus_backend.error) { + bus_unregister(&xenbus_backend.bus); + log(LOG_WARNING, + "XENBUS: Error registering backend device: %i\n", + xenbus_backend.error); + } +} +#endif Property changes on: xen/xenbus/xenbus_probe_backend.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbusvar.h =================================================================== --- xen/xenbus/xenbusvar.h (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbusvar.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,259 @@ +/****************************************************************************** + * xenbus.h + * + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 XenSource Ltd. + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _XEN_XENBUS_XENBUSVAR_H +#define _XEN_XENBUS_XENBUSVAR_H + +#include +#include +#include +#include +#include +#include + +#include "xenbus_if.h" + +enum { + /* + * Path of this device node. + */ + XENBUS_IVAR_NODE, + + /* + * The device type (e.g. vif, vbd). + */ + XENBUS_IVAR_TYPE, + + /* + * The state of this device (not the otherend's state). + */ + XENBUS_IVAR_STATE, + + /* + * Domain ID of the other end device. + */ + XENBUS_IVAR_OTHEREND_ID, + + /* + * Path of the other end device. + */ + XENBUS_IVAR_OTHEREND_PATH +}; + +/* + * Simplified accessors for xenbus devices + */ +#define XENBUS_ACCESSOR(var, ivar, type) \ + __BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type) + +XENBUS_ACCESSOR(node, NODE, const char *) +XENBUS_ACCESSOR(type, TYPE, const char *) +XENBUS_ACCESSOR(state, STATE, enum xenbus_state) +XENBUS_ACCESSOR(otherend_id, OTHEREND_ID, int) +XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *) + +/* Register callback to watch this node. */ +struct xenbus_watch +{ + LIST_ENTRY(xenbus_watch) list; + + /* Path being watched. */ + char *node; + + /* Callback (executed in a process context with no locks held). */ + void (*callback)(struct xenbus_watch *, + const char **vec, unsigned int len); +}; + +typedef int (*xenstore_event_handler_t)(void *); + +struct xenbus_transaction +{ + uint32_t id; +}; + +#define XBT_NIL ((struct xenbus_transaction) { 0 }) + +int xenbus_directory(struct xenbus_transaction t, const char *dir, + const char *node, unsigned int *num, char ***result); +int xenbus_read(struct xenbus_transaction t, const char *dir, + const char *node, unsigned int *len, void **result); +int xenbus_write(struct xenbus_transaction t, const char *dir, + const char *node, const char *string); +int xenbus_mkdir(struct xenbus_transaction t, const char *dir, + const char *node); +int xenbus_exists(struct xenbus_transaction t, const char *dir, + const char *node); +int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node); +int xenbus_transaction_start(struct xenbus_transaction *t); +int xenbus_transaction_end(struct xenbus_transaction t, int abort); + +/* + * Single read and scanf: returns errno or zero. If scancountp is + * non-null, then number of items scanned is returned in *scanncountp. + */ +int xenbus_scanf(struct xenbus_transaction t, + const char *dir, const char *node, int *scancountp, const char *fmt, ...) + __attribute__((format(scanf, 5, 6))); + +/* Single printf and write: returns errno or 0. */ +int xenbus_printf(struct xenbus_transaction t, + const char *dir, const char *node, const char *fmt, ...) + __attribute__((format(printf, 4, 5))); + +/* + * Generic read function: NULL-terminated triples of name, + * sprintf-style type string, and pointer. Returns 0 or errno. + */ +int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); + +/* notifer routines for when the xenstore comes up */ +int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority); +#if 0 +void unregister_xenstore_notifier(); +#endif +int register_xenbus_watch(struct xenbus_watch *watch); +void unregister_xenbus_watch(struct xenbus_watch *watch); +void xs_suspend(void); +void xs_resume(void); + +/* Used by xenbus_dev to borrow kernel's store connection. */ +int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result); + +#if 0 + +#define XENBUS_IS_ERR_READ(str) ({ \ + if (!IS_ERR(str) && strlen(str) == 0) { \ + free(str, M_DEVBUF); \ + str = ERR_PTR(-ERANGE); \ + } \ + IS_ERR(str); \ +}) + +#endif + +#define XENBUS_EXIST_ERR(err) ((err) == ENOENT || (err) == ERANGE) + + +/** + * Register a watch on the given path, using the given xenbus_watch structure + * for storage, and the given callback function as the callback. Return 0 on + * success, or errno on error. On success, the given path will be saved as + * watch->node, and remains the caller's to free. On error, watch->node will + * be NULL, the device will switch to XenbusStateClosing, and the error will + * be saved in the store. + */ +int xenbus_watch_path(device_t dev, char *path, + struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Register a watch on the given path/path2, using the given xenbus_watch + * structure for storage, and the given callback function as the callback. + * Return 0 on success, or errno on error. On success, the watched path + * (path/path2) will be saved as watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_path2(device_t dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Advertise in the store a change of the given driver to the given new_state. + * which case this is performed inside its own transaction. Return 0 on + * success, or errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(device_t dev, + XenbusState new_state); + + +/** + * Grant access to the given ring_mfn to the peer of the given device. + * Return 0 on success, or errno on error. On error, the device will + * switch to XenbusStateClosing, and the error will be saved in the + * store. The grant ring reference is returned in *refp. + */ +int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp); + + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_alloc_evtchn(device_t dev, int *port); + + +/** + * Free an existing event channel. Returns 0 on success or errno on error. + */ +int xenbus_free_evtchn(device_t dev, int port); + + +/** + * Return the state of the driver rooted at the given store path, or + * XenbusStateClosed if no state can be read. + */ +XenbusState xenbus_read_driver_state(const char *path); + + +/*** + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(device_t dev, int err, const char *fmt, + ...); + + +/*** + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ +void xenbus_dev_fatal(device_t dev, int err, const char *fmt, + ...); + +int xenbus_dev_init(void); + +const char *xenbus_strstate(enum xenbus_state state); +int xenbus_dev_is_online(device_t dev); +int xenbus_frontend_closed(device_t dev); + +#endif /* _XEN_XENBUS_XENBUSVAR_H */ Property changes on: xen/xenbus/xenbusvar.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen/xenbus.h:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen/xenbus.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: xen/xenbus/xenbus_comms.h =================================================================== --- xen/xenbus/xenbus_comms.h (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_comms.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,48 @@ +/* + * Private include for xenbus communications. + * + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD$ + */ + +#ifndef _XENBUS_COMMS_H +#define _XENBUS_COMMS_H + +struct sx; +extern int xen_store_evtchn; +extern char *xen_store; + +int xs_init(void); +int xb_init_comms(void); + +/* Low level routines. */ +int xb_write(const void *data, unsigned len, struct lock_object *); +int xb_read(void *data, unsigned len, struct lock_object *); +extern int xenbus_running; + +char *kasprintf(const char *fmt, ...); + + +#endif /* _XENBUS_COMMS_H */ Property changes on: xen/xenbus/xenbus_comms.h ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbus_probe.c =================================================================== --- xen/xenbus/xenbus_probe.c (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_probe.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,603 @@ +/****************************************************************************** + * Talks to Xen Store to figure out what devices we have. + * + * Copyright (C) 2008 Doug Rabson + * Copyright (C) 2005 Rusty Russell, IBM Corporation + * Copyright (C) 2005 Mike Wray, Hewlett-Packard + * Copyright (C) 2005 XenSource Ltd + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#if 0 +#define DPRINTK(fmt, args...) \ + printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) +#else +#define DPRINTK(fmt, args...) ((void)0) +#endif + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +struct xenbus_softc { + struct xenbus_watch xs_devicewatch; + struct task xs_probechildren; + struct intr_config_hook xs_attachcb; + device_t xs_dev; +}; + +struct xenbus_device_ivars { + struct xenbus_watch xd_otherend_watch; /* must be first */ + struct sx xd_lock; + device_t xd_dev; + char *xd_node; /* node name in xenstore */ + char *xd_type; /* xen device type */ + enum xenbus_state xd_state; + int xd_otherend_id; + char *xd_otherend_path; +}; + +/* Simplified asprintf. */ +char * +kasprintf(const char *fmt, ...) +{ + va_list ap; + unsigned int len; + char *p, dummy[1]; + + va_start(ap, fmt); + /* FIXME: vsnprintf has a bug, NULL should work */ + len = vsnprintf(dummy, 0, fmt, ap); + va_end(ap); + + p = malloc(len + 1, M_DEVBUF, M_WAITOK); + va_start(ap, fmt); + vsprintf(p, fmt, ap); + va_end(ap); + return p; +} + +static void +xenbus_identify(driver_t *driver, device_t parent) +{ + + BUS_ADD_CHILD(parent, 0, "xenbus", 0); +} + +static int +xenbus_probe(device_t dev) +{ + int err = 0; + + DPRINTK(""); + + /* Initialize the interface to xenstore. */ + err = xs_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing xenstore comms: %i\n", err); + return (ENXIO); + } + err = gnttab_init(); + if (err) { + log(LOG_WARNING, + "XENBUS: Error initializing grant table: %i\n", err); + return (ENXIO); + } + device_set_desc(dev, "Xen Devices"); + + return (0); +} + +static enum xenbus_state +xenbus_otherend_state(struct xenbus_device_ivars *ivars) +{ + + return (xenbus_read_driver_state(ivars->xd_otherend_path)); +} + +static void +xenbus_backend_changed(struct xenbus_watch *watch, const char **vec, + unsigned int len) +{ + struct xenbus_device_ivars *ivars; + device_t dev; + enum xenbus_state newstate; + + ivars = (struct xenbus_device_ivars *) watch; + dev = ivars->xd_dev; + + if (!ivars->xd_otherend_path + || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH], + strlen(ivars->xd_otherend_path))) + return; + + newstate = xenbus_otherend_state(ivars); + XENBUS_BACKEND_CHANGED(dev, newstate); +} + +static int +xenbus_device_exists(device_t dev, const char *node) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, result; + + if (device_get_children(dev, &kids, &count)) + return (FALSE); + + result = FALSE; + for (i = 0; i < count; i++) { + ivars = device_get_ivars(kids[i]); + if (!strcmp(ivars->xd_node, node)) { + result = TRUE; + break; + } + } + free(kids, M_TEMP); + + return (result); +} + +static int +xenbus_add_device(device_t dev, const char *bus, + const char *type, const char *id) +{ + device_t child; + struct xenbus_device_ivars *ivars; + enum xenbus_state state; + char *statepath; + int error; + + ivars = malloc(sizeof(struct xenbus_device_ivars), + M_DEVBUF, M_ZERO|M_WAITOK); + ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id); + + if (xenbus_device_exists(dev, ivars->xd_node)) { + /* + * We are already tracking this node + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + state = xenbus_read_driver_state(ivars->xd_node); + + if (state != XenbusStateInitialising) { + /* + * Device is not new, so ignore it. This can + * happen if a device is going away after + * switching to Closed. + */ + free(ivars->xd_node, M_DEVBUF); + free(ivars, M_DEVBUF); + return (0); + } + + /* + * Find the backend details + */ + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + sx_init(&ivars->xd_lock, "xdlock"); + ivars->xd_type = strdup(type, M_DEVBUF); + ivars->xd_state = XenbusStateInitialising; + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + ivars->xd_otherend_watch.node = statepath; + ivars->xd_otherend_watch.callback = xenbus_backend_changed; + + child = device_add_child(dev, NULL, -1); + ivars->xd_dev = child; + device_set_ivars(child, ivars); + + return (0); +} + +static int +xenbus_enumerate_type(device_t dev, const char *bus, const char *type) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, type, &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) + xenbus_add_device(dev, bus, type, dir[i]); + + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_enumerate_bus(device_t dev, const char *bus) +{ + char **dir; + unsigned int i, count; + int error; + + error = xenbus_directory(XBT_NIL, bus, "", &count, &dir); + if (error) + return (error); + for (i = 0; i < count; i++) { + xenbus_enumerate_type(dev, bus, dir[i]); + } + free(dir, M_DEVBUF); + + return (0); +} + +static int +xenbus_probe_children(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count; + + /* + * Probe any new devices and register watches for any that + * attach successfully. Since part of the protocol which + * establishes a connection with the other end is interrupt + * driven, we sleep until the device reaches a stable state + * (closed or connected). + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) != DS_NOTPRESENT) + continue; + + if (device_probe_and_attach(kids[i])) + continue; + ivars = device_get_ivars(kids[i]); + register_xenbus_watch( + &ivars->xd_otherend_watch); + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdattach", 0); + sx_xunlock(&ivars->xd_lock); + } + free(kids, M_TEMP); + } + + return (0); +} + +static void +xenbus_probe_children_cb(void *arg, int pending) +{ + device_t dev = (device_t) arg; + + xenbus_probe_children(dev); +} + +static void +xenbus_devices_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + struct xenbus_softc *sc = (struct xenbus_softc *) watch; + device_t dev = sc->xs_dev; + char *node, *bus, *type, *id, *p; + + node = strdup(vec[XS_WATCH_PATH], M_DEVBUF);; + p = strchr(node, '/'); + if (!p) + goto out; + bus = node; + *p = 0; + type = p + 1; + + p = strchr(type, '/'); + if (!p) + goto out; + *p = 0; + id = p + 1; + + p = strchr(id, '/'); + if (p) + *p = 0; + + xenbus_add_device(dev, bus, type, id); + taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren); +out: + free(node, M_DEVBUF); +} + +static void +xenbus_attach_deferred(void *arg) +{ + device_t dev = (device_t) arg; + struct xenbus_softc *sc = device_get_softc(dev); + int error; + + error = xenbus_enumerate_bus(dev, "device"); + if (error) + return; + xenbus_probe_children(dev); + + sc->xs_dev = dev; + sc->xs_devicewatch.node = "device"; + sc->xs_devicewatch.callback = xenbus_devices_changed; + + TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev); + + register_xenbus_watch(&sc->xs_devicewatch); + + config_intrhook_disestablish(&sc->xs_attachcb); +} + +static int +xenbus_attach(device_t dev) +{ + struct xenbus_softc *sc = device_get_softc(dev); + + sc->xs_attachcb.ich_func = xenbus_attach_deferred; + sc->xs_attachcb.ich_arg = dev; + config_intrhook_establish(&sc->xs_attachcb); + + return (0); +} + +static int +xenbus_suspend(device_t dev) +{ + int error; + + DPRINTK(""); + + error = bus_generic_suspend(dev); + if (error) + return (error); + + xs_suspend(); + + return (0); +} + +static int +xenbus_resume(device_t dev) +{ + device_t *kids; + struct xenbus_device_ivars *ivars; + int i, count, error; + char *statepath; + + xb_init_comms(); + xs_resume(); + + /* + * We must re-examine each device and find the new path for + * its backend. + */ + if (device_get_children(dev, &kids, &count) == 0) { + for (i = 0; i < count; i++) { + if (device_get_state(kids[i]) == DS_NOTPRESENT) + continue; + + ivars = device_get_ivars(kids[i]); + + unregister_xenbus_watch( + &ivars->xd_otherend_watch); + ivars->xd_state = XenbusStateInitialising; + + /* + * Find the new backend details and + * re-register our watch. + */ + free(ivars->xd_otherend_path, M_DEVBUF); + error = xenbus_gather(XBT_NIL, ivars->xd_node, + "backend-id", "%i", &ivars->xd_otherend_id, + "backend", NULL, &ivars->xd_otherend_path, + NULL); + if (error) + return (error); + + DEVICE_RESUME(kids[i]); + + statepath = malloc(strlen(ivars->xd_otherend_path) + + strlen("/state") + 1, M_DEVBUF, M_WAITOK); + sprintf(statepath, "%s/state", ivars->xd_otherend_path); + + free(ivars->xd_otherend_watch.node, M_DEVBUF); + ivars->xd_otherend_watch.node = statepath; + register_xenbus_watch( + &ivars->xd_otherend_watch); + +#if 0 + /* + * Can't do this yet since we are running in + * the xenwatch thread and if we sleep here, + * we will stop delivering watch notifications + * and the device will never come back online. + */ + sx_xlock(&ivars->xd_lock); + while (ivars->xd_state != XenbusStateClosed + && ivars->xd_state != XenbusStateConnected) + sx_sleep(&ivars->xd_state, &ivars->xd_lock, + 0, "xdresume", 0); + sx_xunlock(&ivars->xd_lock); +#endif + } + free(kids, M_TEMP); + } + + return (0); +} + +static int +xenbus_print_child(device_t dev, device_t child) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + int retval = 0; + + retval += bus_print_child_header(dev, child); + retval += printf(" at %s", ivars->xd_node); + retval += bus_print_child_footer(dev, child); + + return (retval); +} + +static int +xenbus_read_ivar(device_t dev, device_t child, int index, + uintptr_t * result) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + + switch (index) { + case XENBUS_IVAR_NODE: + *result = (uintptr_t) ivars->xd_node; + return (0); + + case XENBUS_IVAR_TYPE: + *result = (uintptr_t) ivars->xd_type; + return (0); + + case XENBUS_IVAR_STATE: + *result = (uintptr_t) ivars->xd_state; + return (0); + + case XENBUS_IVAR_OTHEREND_ID: + *result = (uintptr_t) ivars->xd_otherend_id; + return (0); + + case XENBUS_IVAR_OTHEREND_PATH: + *result = (uintptr_t) ivars->xd_otherend_path; + return (0); + } + + return (ENOENT); +} + +static int +xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + struct xenbus_device_ivars *ivars = device_get_ivars(child); + enum xenbus_state newstate; + int currstate; + int error; + + switch (index) { + case XENBUS_IVAR_STATE: + newstate = (enum xenbus_state) value; + sx_xlock(&ivars->xd_lock); + if (ivars->xd_state == newstate) + goto out; + + error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state", + NULL, "%d", &currstate); + if (error) + goto out; + + error = xenbus_printf(XBT_NIL, ivars->xd_node, "state", + "%d", newstate); + if (error) { + if (newstate != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, error, "writing new state"); + goto out; + } + ivars->xd_state = newstate; + wakeup(&ivars->xd_state); + out: + sx_xunlock(&ivars->xd_lock); + return (0); + + case XENBUS_IVAR_NODE: + case XENBUS_IVAR_TYPE: + case XENBUS_IVAR_OTHEREND_ID: + case XENBUS_IVAR_OTHEREND_PATH: + /* + * These variables are read-only. + */ + return (EINVAL); + } + + return (ENOENT); +} + +SYSCTL_DECL(_dev); +SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen"); +SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, ""); +SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, ""); + +static device_method_t xenbus_methods[] = { + /* Device interface */ + DEVMETHOD(device_identify, xenbus_identify), + DEVMETHOD(device_probe, xenbus_probe), + DEVMETHOD(device_attach, xenbus_attach), + DEVMETHOD(device_detach, bus_generic_detach), + DEVMETHOD(device_shutdown, bus_generic_shutdown), + DEVMETHOD(device_suspend, xenbus_suspend), + DEVMETHOD(device_resume, xenbus_resume), + + /* Bus interface */ + DEVMETHOD(bus_print_child, xenbus_print_child), + DEVMETHOD(bus_read_ivar, xenbus_read_ivar), + DEVMETHOD(bus_write_ivar, xenbus_write_ivar), + + { 0, 0 } +}; + +static char driver_name[] = "xenbus"; +static driver_t xenbus_driver = { + driver_name, + xenbus_methods, + sizeof(struct xenbus_softc), +}; +devclass_t xenbus_devclass; + +#ifdef XENHVM +DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0); +#else +DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); +#endif Property changes on: xen/xenbus/xenbus_probe.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/xenbus/xenbus_if.m =================================================================== --- xen/xenbus/xenbus_if.m (.../stable/6/sys) (revision 0) +++ xen/xenbus/xenbus_if.m (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,37 @@ +#- +# Copyright (c) 2008 Doug Rabson +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +#include +#include + +INTERFACE xenbus; + +METHOD int backend_changed { + device_t dev; + enum xenbus_state newstate; +}; Index: xen/gnttab.h =================================================================== --- xen/gnttab.h (.../stable/6/sys) (revision 0) +++ xen/gnttab.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,164 @@ +/****************************************************************************** + * gnttab.h + * + * Two sets of functionality: + * 1. Granting foreign access to our memory reservation. + * 2. Accessing others' memory reservations via grant references. + * (i.e., mechanisms for both sender and recipient of grant references) + * + * Copyright (c) 2004-2005, K A Fraser + * Copyright (c) 2005, Christopher Clark + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __ASM_GNTTAB_H__ + +#include + +#include +#include +#include +#include + +struct gnttab_free_callback { + struct gnttab_free_callback *next; + void (*fn)(void *); + void *arg; + uint16_t count; +}; + +int gnttab_init(void); + +/* + * Allocate a grant table reference and return it in *result. Returns + * zero on success or errno on error. + */ +int gnttab_grant_foreign_access(domid_t domid, unsigned long frame, + int flags, grant_ref_t *result); + +/* + * End access through the given grant reference, iff the grant entry is no + * longer in use. Return 1 if the grant entry was freed, 0 if it is still in + * use. + */ +int gnttab_end_foreign_access_ref(grant_ref_t ref); + +/* + * Eventually end access through the given grant reference, and once that + * access has been ended, free the given page too. Access will be ended + * immediately iff the grant entry is not in use, otherwise it will happen + * some time later. page may be 0, in which case no freeing will occur. + */ +void gnttab_end_foreign_access(grant_ref_t ref, void *page); + +int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result); + +unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref); +unsigned long gnttab_end_foreign_transfer(grant_ref_t ref); + +int gnttab_query_foreign_access(grant_ref_t ref); + +/* + * operations on reserved batches of grant references + */ +int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *pprivate_head); + +void gnttab_free_grant_reference(grant_ref_t ref); + +void gnttab_free_grant_references(grant_ref_t head); + +int gnttab_empty_grant_references(const grant_ref_t *pprivate_head); + +int gnttab_claim_grant_reference(grant_ref_t *pprivate_head); + +void gnttab_release_grant_reference(grant_ref_t *private_head, + grant_ref_t release); + +void gnttab_request_free_callback(struct gnttab_free_callback *callback, + void (*fn)(void *), void *arg, uint16_t count); +void gnttab_cancel_free_callback(struct gnttab_free_callback *callback); + +void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid, + unsigned long frame, int flags); + +void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, + unsigned long pfn); + +int gnttab_suspend(void); +int gnttab_resume(void); + +#if 0 + +#include + +static inline void +gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr, + uint32_t flags, grant_ref_t ref, domid_t domid) +{ + if (flags & GNTMAP_contains_pte) + map->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + map->host_addr = vtophys(addr); + else + map->host_addr = addr; + + map->flags = flags; + map->ref = ref; + map->dom = domid; +} + +static inline void +gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, vm_paddr_t addr, + uint32_t flags, grant_handle_t handle) +{ + if (flags & GNTMAP_contains_pte) + unmap->host_addr = addr; + else if (xen_feature(XENFEAT_auto_translated_physmap)) + unmap->host_addr = vtophys(addr); + else + unmap->host_addr = addr; + + unmap->handle = handle; + unmap->dev_bus_addr = 0; +} + +static inline void +gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, vm_paddr_t addr, + vm_paddr_t new_addr, grant_handle_t handle) +{ + if (xen_feature(XENFEAT_auto_translated_physmap)) { + unmap->host_addr = vtophys(addr); + unmap->new_addr = vtophys(new_addr); + } else { + unmap->host_addr = addr; + unmap->new_addr = new_addr; + } + + unmap->handle = handle; +} +#endif + +#endif /* __ASM_GNTTAB_H__ */ Property changes on: xen/gnttab.h ___________________________________________________________________ Added: fbsd:nokeywords + true Index: xen/evtchn/evtchn_dev.c =================================================================== --- xen/evtchn/evtchn_dev.c (.../stable/6/sys) (revision 0) +++ xen/evtchn/evtchn_dev.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,392 @@ +/****************************************************************************** + * evtchn.c + * + * Xenolinux driver for receiving and demuxing event-channel signals. + * + * Copyright (c) 2004, K A Fraser + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + + +typedef struct evtchn_sotfc { + + struct selinfo ev_rsel; +} evtchn_softc_t; + + +#ifdef linuxcrap +/* NB. This must be shared amongst drivers if more things go in /dev/xen */ +static devfs_handle_t xen_dev_dir; +#endif + +/* Only one process may open /dev/xen/evtchn at any time. */ +static unsigned long evtchn_dev_inuse; + +/* Notification ring, accessed via /dev/xen/evtchn. */ + +#define EVTCHN_RING_SIZE 2048 /* 2048 16-bit entries */ + +#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1)) +static uint16_t *ring; +static unsigned int ring_cons, ring_prod, ring_overflow; + +/* Which ports is user-space bound to? */ +static uint32_t bound_ports[32]; + +/* Unique address for processes to sleep on */ +static void *evtchn_waddr = ˚ + +static struct mtx lock, upcall_lock; + +static d_read_t evtchn_read; +static d_write_t evtchn_write; +static d_ioctl_t evtchn_ioctl; +static d_poll_t evtchn_poll; +static d_open_t evtchn_open; +static d_close_t evtchn_close; + + +void +evtchn_device_upcall(int port) +{ + mtx_lock(&upcall_lock); + + mask_evtchn(port); + clear_evtchn(port); + + if ( ring != NULL ) { + if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { + ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port; + if ( ring_cons == ring_prod++ ) { + wakeup(evtchn_waddr); + } + } + else { + ring_overflow = 1; + } + } + + mtx_unlock(&upcall_lock); +} + +static void +__evtchn_reset_buffer_ring(void) +{ + /* Initialise the ring to empty. Clear errors. */ + ring_cons = ring_prod = ring_overflow = 0; +} + +static int +evtchn_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc; + unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0; + count = uio->uio_resid; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) + { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + for ( ; ; ) { + if ( (c = ring_cons) != (p = ring_prod) ) + break; + + if ( ring_overflow ) { + rc = EFBIG; + goto out; + } + + if (sst != 0) { + rc = EINTR; + goto out; + } + + /* PCATCH == check for signals before and after sleeping + * PWAIT == priority of waiting on resource + */ + sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10); + } + + /* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */ + if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) { + bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t); + bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t); + } + else { + bytes1 = (p - c) * sizeof(uint16_t); + bytes2 = 0; + } + + /* Truncate chunks according to caller's maximum byte count. */ + if ( bytes1 > count ) { + bytes1 = count; + bytes2 = 0; + } + else if ( (bytes1 + bytes2) > count ) { + bytes2 = count - bytes1; + } + + if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) || + ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio))) + /* keeping this around as its replacement is not equivalent + * copyout(&ring[0], &buf[bytes1], bytes2) + */ + { + rc = EFAULT; + goto out; + } + + ring_cons += (bytes1 + bytes2) / sizeof(uint16_t); + + rc = bytes1 + bytes2; + + out: + + return rc; +} + +static int +evtchn_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + int rc, i, count; + + count = uio->uio_resid; + + uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK); + + + if ( kbuf == NULL ) + return ENOMEM; + + count &= ~1; /* even number of bytes */ + + if ( count == 0 ) { + rc = 0; + goto out; + } + + if ( count > PAGE_SIZE ) + count = PAGE_SIZE; + + if ( uiomove(kbuf, count, uio) != 0 ) { + rc = EFAULT; + goto out; + } + + mtx_lock_spin(&lock); + for ( i = 0; i < (count/2); i++ ) + if ( test_bit(kbuf[i], &bound_ports[0]) ) + unmask_evtchn(kbuf[i]); + mtx_unlock_spin(&lock); + + rc = count; + + out: + free(kbuf, M_DEVBUF); + return rc; +} + +static int +evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, + int mode, struct thread *td __unused) +{ + int rc = 0; + + mtx_lock_spin(&lock); + + switch ( cmd ) + { + case EVTCHN_RESET: + __evtchn_reset_buffer_ring(); + break; + case EVTCHN_BIND: + if ( !synch_test_and_set_bit((uintptr_t)arg, &bound_ports[0]) ) + unmask_evtchn((uintptr_t)arg); + else + rc = EINVAL; + break; + case EVTCHN_UNBIND: + if ( synch_test_and_clear_bit((uintptr_t)arg, &bound_ports[0]) ) + mask_evtchn((uintptr_t)arg); + else + rc = EINVAL; + break; + default: + rc = ENOSYS; + break; + } + + mtx_unlock_spin(&lock); + + return rc; +} + +static int +evtchn_poll(struct cdev *dev, int poll_events, struct thread *td) +{ + + evtchn_softc_t *sc; + unsigned int mask = POLLOUT | POLLWRNORM; + + sc = dev->si_drv1; + + if ( ring_cons != ring_prod ) + mask |= POLLIN | POLLRDNORM; + else if ( ring_overflow ) + mask = POLLERR; + else + selrecord(td, &sc->ev_rsel); + + + return mask; +} + + +static int +evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td) +{ + uint16_t *_ring; + + if (flag & O_NONBLOCK) + return EBUSY; + + if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) ) + return EBUSY; + + if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL ) + return ENOMEM; + + mtx_lock_spin(&lock); + ring = _ring; + __evtchn_reset_buffer_ring(); + mtx_unlock_spin(&lock); + + + return 0; +} + +static int +evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused) +{ + int i; + + mtx_lock_spin(&lock); + if (ring != NULL) { + free(ring, M_DEVBUF); + ring = NULL; + } + for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) + if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) + mask_evtchn(i); + mtx_unlock_spin(&lock); + + evtchn_dev_inuse = 0; + + return 0; +} + +static struct cdevsw evtchn_devsw = { + d_version: D_VERSION, + d_open: evtchn_open, + d_close: evtchn_close, + d_read: evtchn_read, + d_write: evtchn_write, + d_ioctl: evtchn_ioctl, + d_poll: evtchn_poll, + d_name: "evtchn", + d_flags: 0, +}; + + +/* XXX - if this device is ever supposed to support use by more than one process + * this global static will have to go away + */ +static struct cdev *evtchn_dev; + + + +static int +evtchn_dev_init(void *dummy __unused) +{ + /* XXX I believe we don't need these leaving them here for now until we + * have some semblance of it working + */ + mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF); + + /* (DEVFS) create '/dev/misc/evtchn'. */ + evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn"); + + mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS); + + evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); + bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); + + /* XXX I don't think we need any of this rubbish */ +#if 0 + if ( err != 0 ) + { + printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); + return err; + } + + /* (DEVFS) create directory '/dev/xen'. */ + xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); + + /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ + pos = devfs_generate_path(evtchn_miscdev.devfs_handle, + &link_dest[3], + sizeof(link_dest) - 3); + if ( pos >= 0 ) + strncpy(&link_dest[pos], "../", 3); + /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ + (void)devfs_mk_symlink(xen_dev_dir, + "evtchn", + DEVFS_FL_DEFAULT, + &link_dest[pos], + &symlink_handle, + NULL); + + /* (DEVFS) automatically destroy the symlink with its destination. */ + devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); +#endif + if (bootverbose) + printf("Event-channel device installed.\n"); + + return 0; +} + +SYSINIT(evtchn_dev_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_dev_init, NULL); + + Property changes on: xen/evtchn/evtchn_dev.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/evtchn/evtchn.c =================================================================== --- xen/evtchn/evtchn.c (.../stable/6/sys) (revision 0) +++ xen/evtchn/evtchn.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,1106 @@ +/****************************************************************************** + * evtchn.c + * + * Communication via Xen event channels. + * + * Copyright (c) 2002-2005, K A Fraser + * Copyright (c) 2005-2006 Kip Macy + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static inline unsigned long __ffs(unsigned long word) +{ + __asm__("bsfl %1,%0" + :"=r" (word) + :"rm" (word)); + return word; +} + +static struct mtx irq_mapping_update_lock; +static struct xenpic *xp; +struct xenpic_intsrc { + struct intsrc xp_intsrc; + void *xp_cookie; + uint8_t xp_vector; + boolean_t xp_masked; +}; + +struct xenpic { + struct pic *xp_dynirq_pic; + struct pic *xp_pirq_pic; + uint16_t xp_numintr; + struct xenpic_intsrc xp_pins[0]; +}; + +#define TODO printf("%s: not implemented!\n", __func__) + +/* IRQ <-> event-channel mappings. */ +static int evtchn_to_irq[NR_EVENT_CHANNELS]; + +/* Packed IRQ information: binding type, sub-type index, and event channel. */ +static uint32_t irq_info[NR_IRQS]; +/* Binding types. */ +enum { + IRQT_UNBOUND, + IRQT_PIRQ, + IRQT_VIRQ, + IRQT_IPI, + IRQT_LOCAL_PORT, + IRQT_CALLER_PORT, + _IRQT_COUNT + +}; + + +#define _IRQT_BITS 4 +#define _EVTCHN_BITS 12 +#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS) + +/* Constructor for packed IRQ information. */ +static inline uint32_t +mk_irq_info(uint32_t type, uint32_t index, uint32_t evtchn) +{ + + return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn); +} + +/* Constructor for packed IRQ information. */ + +/* Convenient shorthand for packed representation of an unbound IRQ. */ +#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0) + +/* + * Accessors for packed IRQ information. + */ + +static inline unsigned int evtchn_from_irq(int irq) +{ + return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1); +} + +static inline unsigned int index_from_irq(int irq) +{ + return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1); +} + +static inline unsigned int type_from_irq(int irq) +{ + return irq_info[irq] >> (32 - _IRQT_BITS); +} + + +/* IRQ <-> VIRQ mapping. */ + +/* IRQ <-> IPI mapping. */ +#ifndef NR_IPIS +#ifdef SMP +#error "NR_IPIS not defined" +#endif +#define NR_IPIS 1 +#endif + +/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ +static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; + +/* Reference counts for bindings to IRQs. */ +static int irq_bindcount[NR_IRQS]; + +#define VALID_EVTCHN(_chn) ((_chn) != 0) + +#ifdef SMP + +static uint8_t cpu_evtchn[NR_EVENT_CHANNELS]; +static unsigned long cpu_evtchn_mask[MAX_VIRT_CPUS][NR_EVENT_CHANNELS/LONG_BIT]; + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + cpu_evtchn_mask[cpu][idx] & \ + ~(sh)->evtchn_mask[idx]) + +static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu) +{ + clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]); + set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]); + cpu_evtchn[chn] = cpu; +} + +static void init_evtchn_cpu_bindings(void) +{ + /* By default all event channels notify CPU#0. */ + memset(cpu_evtchn, 0, sizeof(cpu_evtchn)); + memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0])); +} + +#define cpu_from_evtchn(evtchn) (cpu_evtchn[evtchn]) + +#else + +#define active_evtchns(cpu,sh,idx) \ + ((sh)->evtchn_pending[idx] & \ + ~(sh)->evtchn_mask[idx]) +#define bind_evtchn_to_cpu(chn,cpu) ((void)0) +#define init_evtchn_cpu_bindings() ((void)0) +#define cpu_from_evtchn(evtchn) (0) + +#endif + + +/* + * Force a proper event-channel callback from Xen after clearing the + * callback mask. We do this in a very simple manner, by making a call + * down into Xen. The pending flag will be checked by Xen on return. + */ +void force_evtchn_callback(void) +{ + (void)HYPERVISOR_xen_version(0, NULL); +} + +void +evtchn_do_upcall(struct intrframe *frame) +{ + unsigned long l1, l2; + unsigned int l1i, l2i, port; + int irq, cpu; + shared_info_t *s; + vcpu_info_t *vcpu_info; + + cpu = PCPU_GET(cpuid); + s = HYPERVISOR_shared_info; + vcpu_info = &s->vcpu_info[cpu]; + + vcpu_info->evtchn_upcall_pending = 0; + + /* NB. No need for a barrier here -- XCHG is a barrier on x86. */ + l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0); + + while (l1 != 0) { + l1i = __ffs(l1); + l1 &= ~(1 << l1i); + + while ((l2 = active_evtchns(cpu, s, l1i)) != 0) { + l2i = __ffs(l2); + + port = (l1i * LONG_BIT) + l2i; + if ((irq = evtchn_to_irq[port]) != -1) { + struct intsrc *isrc = intr_lookup_source(irq); + /* + * ack + */ + mask_evtchn(port); + clear_evtchn(port); + + intr_execute_handlers(isrc, frame); + } else { + evtchn_device_upcall(port); + } + } + } +} + +void +ipi_pcpu(unsigned int cpu, int vector) +{ + int irq; + + irq = PCPU_GET(ipi_to_irq[vector]); + + notify_remote_via_irq(irq); +} + +static int +find_unbound_irq(void) +{ + int dynirq, irq; + + for (dynirq = 0; dynirq < NR_IRQS; dynirq++) { + irq = dynirq_to_irq(dynirq); + if (irq_bindcount[irq] == 0) + break; + } + + if (irq == NR_IRQS) + panic("No available IRQ to bind to: increase NR_IRQS!\n"); + + return (irq); +} + +static int +bind_caller_port_to_irq(unsigned int caller_port) +{ + int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = evtchn_to_irq[caller_port]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + + evtchn_to_irq[caller_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port); + } + + irq_bindcount[irq]++; + unmask_evtchn(caller_port); + + out: + mtx_unlock_spin(&irq_mapping_update_lock); + return irq; +} + +static int +bind_local_port_to_irq(unsigned int local_port) +{ + int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + KASSERT(evtchn_to_irq[local_port] == -1, + ("evtchn_to_irq inconsistent")); + + if ((irq = find_unbound_irq()) < 0) { + struct evtchn_close close = { .port = local_port }; + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + + goto out; + } + + evtchn_to_irq[local_port] = irq; + irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); + irq_bindcount[irq]++; + unmask_evtchn(local_port); + + out: + mtx_unlock_spin(&irq_mapping_update_lock); + return irq; +} + +static int +bind_listening_port_to_irq(unsigned int remote_domain) +{ + struct evtchn_alloc_unbound alloc_unbound; + int err; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + + return err ? : bind_local_port_to_irq(alloc_unbound.port); +} + +static int +bind_interdomain_evtchn_to_irq(unsigned int remote_domain, + unsigned int remote_port) +{ + struct evtchn_bind_interdomain bind_interdomain; + int err; + + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + + return err ? : bind_local_port_to_irq(bind_interdomain.local_port); +} + +static int +bind_virq_to_irq(unsigned int virq, unsigned int cpu) +{ + struct evtchn_bind_virq bind_virq; + int evtchn = 0, irq; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = pcpu_find(cpu)->pc_virq_to_irq[virq]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + + bind_virq.virq = virq; + bind_virq.vcpu = cpu; + HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); + + evtchn = bind_virq.port; + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + pcpu_find(cpu)->pc_virq_to_irq[virq] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + + irq_bindcount[irq]++; + unmask_evtchn(evtchn); +out: + mtx_unlock_spin(&irq_mapping_update_lock); + + return irq; +} + + +extern int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu); + +int +bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +{ + struct evtchn_bind_ipi bind_ipi; + int irq; + int evtchn = 0; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((irq = pcpu_find(cpu)->pc_ipi_to_irq[ipi]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + + bind_ipi.vcpu = cpu; + HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); + evtchn = bind_ipi.port; + + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + pcpu_find(cpu)->pc_ipi_to_irq[ipi] = irq; + + bind_evtchn_to_cpu(evtchn, cpu); + } + irq_bindcount[irq]++; + unmask_evtchn(evtchn); +out: + + mtx_unlock_spin(&irq_mapping_update_lock); + + return irq; +} + + +static void +unbind_from_irq(int irq) +{ + struct evtchn_close close; + int evtchn = evtchn_from_irq(irq); + int cpu; + + mtx_lock_spin(&irq_mapping_update_lock); + + if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) { + close.port = evtchn; + HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); + + switch (type_from_irq(irq)) { + case IRQT_VIRQ: + cpu = cpu_from_evtchn(evtchn); + pcpu_find(cpu)->pc_virq_to_irq[index_from_irq(irq)] = -1; + break; + case IRQT_IPI: + cpu = cpu_from_evtchn(evtchn); + pcpu_find(cpu)->pc_ipi_to_irq[index_from_irq(irq)] = -1; + break; + default: + break; + } + + /* Closed ports are implicitly re-bound to VCPU0. */ + bind_evtchn_to_cpu(evtchn, 0); + + evtchn_to_irq[evtchn] = -1; + irq_info[irq] = IRQ_UNBOUND; + } + + mtx_unlock_spin(&irq_mapping_update_lock); +} + +int +bind_caller_port_to_irqhandler(unsigned int caller_port, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + int error; + + irq = bind_caller_port_to_irq(caller_port); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + error = intr_add_handler(devname, irq, handler, arg, irqflags, + &xp->xp_pins[irq].xp_cookie); + + if (error) { + unbind_from_irq(irq); + return (error); + } + + if (irqp) + *irqp = irq; + + return (0); +} + +int +bind_listening_port_to_irqhandler(unsigned int remote_domain, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + int error; + + irq = bind_listening_port_to_irq(remote_domain); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + error = intr_add_handler(devname, irq, handler, arg, irqflags, + &xp->xp_pins[irq].xp_cookie); + if (error) { + unbind_from_irq(irq); + return (error); + } + if (irqp) + *irqp = irq; + + return (0); +} + +int +bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + unsigned int remote_port, const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + int error; + + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + error = intr_add_handler(devname, irq, handler, NULL, + irqflags, &xp->xp_pins[irq].xp_cookie); + if (error) { + unbind_from_irq(irq); + return (error); + } + + if (irqp) + *irqp = irq; + return (0); +} + +int +bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + int error; + + irq = bind_virq_to_irq(virq, cpu); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + error = intr_add_handler(devname, irq, handler, + NULL, irqflags, &xp->xp_pins[irq].xp_cookie); + if (error) { + unbind_from_irq(irq); + return (error); + } + + if (irqp) + *irqp = irq; + return (0); +} + +int +bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, + const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp) +{ + unsigned int irq; + int error; + + irq = bind_ipi_to_irq(ipi, cpu); + intr_register_source(&xp->xp_pins[irq].xp_intsrc); + error = intr_add_handler(devname, irq, handler, + NULL, irqflags, &xp->xp_pins[irq].xp_cookie); + if (error) { + unbind_from_irq(irq); + return (error); + } + + if (irqp) + *irqp = irq; + return (0); +} + +void +unbind_from_irqhandler(unsigned int irq) +{ + intr_remove_handler(xp->xp_pins[irq].xp_cookie); + unbind_from_irq(irq); +} + +#if 0 +/* Rebind an evtchn so that it gets delivered to a specific cpu */ +static void +rebind_irq_to_cpu(unsigned irq, unsigned tcpu) +{ + evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu }; + int evtchn; + + mtx_lock_spin(&irq_mapping_update_lock); + + evtchn = evtchn_from_irq(irq); + if (!VALID_EVTCHN(evtchn)) { + mtx_unlock_spin(&irq_mapping_update_lock); + return; + } + + /* Send future instances of this interrupt to other vcpu. */ + bind_vcpu.port = evtchn; + bind_vcpu.vcpu = tcpu; + + /* + * If this fails, it usually just indicates that we're dealing with a + * virq or IPI channel, which don't actually need to be rebound. Ignore + * it, but don't do the xenlinux-level rebind in that case. + */ + if (HYPERVISOR_event_channel_op(&op) >= 0) + bind_evtchn_to_cpu(evtchn, tcpu); + + mtx_unlock_spin(&irq_mapping_update_lock); + +} + +static void set_affinity_irq(unsigned irq, cpumask_t dest) +{ + unsigned tcpu = ffs(dest) - 1; + rebind_irq_to_cpu(irq, tcpu); +} +#endif + +/* + * Interface to generic handling in intr_machdep.c + */ + + +/*------------ interrupt handling --------------------------------------*/ +#define TODO printf("%s: not implemented!\n", __func__) + + +static void xenpic_dynirq_enable_source(struct intsrc *isrc); +static void xenpic_dynirq_disable_source(struct intsrc *isrc, int); +static void xenpic_dynirq_eoi_source(struct intsrc *isrc); +static void xenpic_dynirq_enable_intr(struct intsrc *isrc); + +static void xenpic_pirq_enable_source(struct intsrc *isrc); +static void xenpic_pirq_disable_source(struct intsrc *isrc, int); +static void xenpic_pirq_eoi_source(struct intsrc *isrc); +static void xenpic_pirq_enable_intr(struct intsrc *isrc); + + +static int xenpic_vector(struct intsrc *isrc); +static int xenpic_source_pending(struct intsrc *isrc); +static void xenpic_suspend(struct pic* pic); +static void xenpic_resume(struct pic* pic); +static void xenpic_assign_cpu(struct intsrc *, u_int apic_id); + + +struct pic xenpic_dynirq_template = { + .pic_enable_source = xenpic_dynirq_enable_source, + .pic_disable_source = xenpic_dynirq_disable_source, + .pic_eoi_source = xenpic_dynirq_eoi_source, + .pic_enable_intr = xenpic_dynirq_enable_intr, + .pic_vector = xenpic_vector, + .pic_source_pending = xenpic_source_pending, + .pic_suspend = xenpic_suspend, + .pic_resume = xenpic_resume +}; + +struct pic xenpic_pirq_template = { + .pic_enable_source = xenpic_pirq_enable_source, + .pic_disable_source = xenpic_pirq_disable_source, + .pic_eoi_source = xenpic_pirq_eoi_source, + .pic_enable_intr = xenpic_pirq_enable_intr, + .pic_vector = xenpic_vector, + .pic_source_pending = xenpic_source_pending, + .pic_suspend = xenpic_suspend, + .pic_resume = xenpic_resume, + .pic_assign_cpu = xenpic_assign_cpu +}; + + + +void +xenpic_dynirq_enable_source(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + + mtx_lock_spin(&irq_mapping_update_lock); + if (xp->xp_masked) { + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + xp->xp_masked = FALSE; + } + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_disable_source(struct intsrc *isrc, int foo) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + + mtx_lock_spin(&irq_mapping_update_lock); + if (!xp->xp_masked) { + irq = xenpic_vector(isrc); + mask_evtchn(evtchn_from_irq(irq)); + xp->xp_masked = TRUE; + } + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_enable_intr(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + mtx_lock_spin(&irq_mapping_update_lock); + xp->xp_masked = 0; + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_dynirq_eoi_source(struct intsrc *isrc) +{ + unsigned int irq; + struct xenpic_intsrc *xp; + + xp = (struct xenpic_intsrc *)isrc; + mtx_lock_spin(&irq_mapping_update_lock); + xp->xp_masked = 0; + irq = xenpic_vector(isrc); + unmask_evtchn(evtchn_from_irq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static int +xenpic_vector(struct intsrc *isrc) +{ + struct xenpic_intsrc *pin; + + pin = (struct xenpic_intsrc *)isrc; + //printf("xenpic_vector(): isrc=%p,vector=%u\n", pin, pin->xp_vector); + + return (pin->xp_vector); +} + +static int +xenpic_source_pending(struct intsrc *isrc) +{ + struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc; + + /* XXXEN: TODO */ + printf("xenpic_source_pending(): vector=%x,masked=%x\n", + pin->xp_vector, pin->xp_masked); + +/* notify_remote_via_evtchn(pin->xp_vector); // XXX RS: Is this correct? */ + return 0; +} + +static void +xenpic_suspend(struct pic* pic) +{ + TODO; +} + +static void +xenpic_resume(struct pic* pic) +{ + TODO; +} + +static void +xenpic_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + TODO; +} + +void +notify_remote_via_irq(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + notify_remote_via_evtchn(evtchn); + else + panic("invalid evtchn %d", irq); +} + +/* required for support of physical devices */ +static inline void +pirq_unmask_notify(int pirq) +{ + struct physdev_eoi eoi = { .irq = pirq }; + + if (unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0]))) { + (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); + } +} + +static inline void +pirq_query_unmask(int pirq) +{ + struct physdev_irq_status_query irq_status_query; + + irq_status_query.irq = pirq; + (void)HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY, &irq_status_query); + clear_bit(pirq, &pirq_needs_unmask_notify[0]); + if ( irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY ) + set_bit(pirq, &pirq_needs_unmask_notify[0]); +} + +/* + * On startup, if there is no action associated with the IRQ then we are + * probing. In this case we should not share with others as it will confuse us. + */ +#define probing_irq(_irq) (intr_lookup_source(irq) == NULL) + +static void +xenpic_pirq_enable_intr(struct intsrc *isrc) +{ + struct evtchn_bind_pirq bind_pirq; + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + goto out; + + bind_pirq.pirq = irq; + /* NB. We are happy to share unless we are probing. */ + bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; + + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { +#ifndef XEN_PRIVILEGED_GUEST + panic("unexpected pirq call"); +#endif + if (!probing_irq(irq)) /* Some failures are expected when probing. */ + printf("Failed to obtain physical IRQ %d\n", irq); + mtx_unlock_spin(&irq_mapping_update_lock); + return; + } + evtchn = bind_pirq.port; + + pirq_query_unmask(irq_to_pirq(irq)); + + bind_evtchn_to_cpu(evtchn, 0); + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn); + + out: + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_pirq_enable_source(struct intsrc *isrc) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + +static void +xenpic_pirq_disable_source(struct intsrc *isrc, int eoi) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + mask_evtchn(evtchn); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + + +static void +xenpic_pirq_eoi_source(struct intsrc *isrc) +{ + int evtchn; + unsigned int irq; + + mtx_lock_spin(&irq_mapping_update_lock); + irq = xenpic_vector(isrc); + evtchn = evtchn_from_irq(irq); + + if (!VALID_EVTCHN(evtchn)) + goto done; + + unmask_evtchn(evtchn); + pirq_unmask_notify(irq_to_pirq(irq)); + done: + mtx_unlock_spin(&irq_mapping_update_lock); +} + +int +irq_to_evtchn_port(int irq) +{ + return evtchn_from_irq(irq); +} + +void +mask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_set_bit(port, &s->evtchn_mask[0]); +} + +void +unmask_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + unsigned int cpu = PCPU_GET(cpuid); + vcpu_info_t *vcpu_info = &s->vcpu_info[cpu]; + + /* Slow path (hypercall) if this is a non-local port. */ + if (unlikely(cpu != cpu_from_evtchn(port))) { + struct evtchn_unmask unmask = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask); + return; + } + + synch_clear_bit(port, &s->evtchn_mask); + + /* + * The following is basically the equivalent of 'hw_resend_irq'. Just + * like a real IO-APIC we 'lose the interrupt edge' if the channel is + * masked. + */ + if (synch_test_bit(port, &s->evtchn_pending) && + !synch_test_and_set_bit(port / LONG_BIT, + &vcpu_info->evtchn_pending_sel)) { + vcpu_info->evtchn_upcall_pending = 1; + if (!vcpu_info->evtchn_upcall_mask) + force_evtchn_callback(); + } +} + +void irq_resume(void) +{ + evtchn_op_t op; + int cpu, pirq, virq, ipi, irq, evtchn; + + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_ipi bind_ipi; + + init_evtchn_cpu_bindings(); + + /* New event-channel space is not 'live' yet. */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + mask_evtchn(evtchn); + + /* Check that no PIRQs are still bound. */ + for (pirq = 0; pirq < NR_PIRQS; pirq++) { + KASSERT(irq_info[pirq_to_irq(pirq)] == IRQ_UNBOUND, + ("pirq_to_irq inconsistent")); + } + + /* Secondary CPUs must have no VIRQ or IPI bindings. */ + for (cpu = 1; cpu < MAX_VIRT_CPUS; cpu++) { + for (virq = 0; virq < NR_VIRQS; virq++) { + KASSERT(pcpu_find(cpu)->pc_virq_to_irq[virq] == -1, + ("virq_to_irq inconsistent")); + } + for (ipi = 0; ipi < NR_IPIS; ipi++) { + KASSERT(pcpu_find(cpu)->pc_ipi_to_irq[ipi] == -1, + ("ipi_to_irq inconsistent")); + } + } + + /* No IRQ <-> event-channel mappings. */ + for (irq = 0; irq < NR_IRQS; irq++) + irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */ + for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) + evtchn_to_irq[evtchn] = -1; + + /* Primary CPU: rebind VIRQs automatically. */ + for (virq = 0; virq < NR_VIRQS; virq++) { + if ((irq = pcpu_find(0)->pc_virq_to_irq[virq]) == -1) + continue; + + KASSERT(irq_info[irq] == mk_irq_info(IRQT_VIRQ, virq, 0), + ("irq_info inconsistent")); + + /* Get a new binding from Xen. */ + bind_virq.virq = virq; + bind_virq.vcpu = 0; + HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); + evtchn = bind_virq.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } + + /* Primary CPU: rebind IPIs automatically. */ + for (ipi = 0; ipi < NR_IPIS; ipi++) { + if ((irq = pcpu_find(0)->pc_ipi_to_irq[ipi]) == -1) + continue; + + KASSERT(irq_info[irq] == mk_irq_info(IRQT_IPI, ipi, 0), + ("irq_info inconsistent")); + + /* Get a new binding from Xen. */ + memset(&op, 0, sizeof(op)); + bind_ipi.vcpu = 0; + HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); + evtchn = bind_ipi.port; + + /* Record the new mapping. */ + evtchn_to_irq[evtchn] = irq; + irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); + + /* Ready for use. */ + unmask_evtchn(evtchn); + } +} + +static void +evtchn_init(void *dummy __unused) +{ + int i, cpu; + struct xenpic_intsrc *pin, *tpin; + + + init_evtchn_cpu_bindings(); + + /* No VIRQ or IPI bindings. */ + for (cpu = 0; cpu < mp_ncpus; cpu++) { + for (i = 0; i < NR_VIRQS; i++) + pcpu_find(cpu)->pc_virq_to_irq[i] = -1; + for (i = 0; i < NR_IPIS; i++) + pcpu_find(cpu)->pc_ipi_to_irq[i] = -1; + } + + /* No event-channel -> IRQ mappings. */ + for (i = 0; i < NR_EVENT_CHANNELS; i++) { + evtchn_to_irq[i] = -1; + mask_evtchn(i); /* No event channels are 'live' right now. */ + } + + /* No IRQ -> event-channel mappings. */ + for (i = 0; i < NR_IRQS; i++) + irq_info[i] = IRQ_UNBOUND; + + xp = malloc(sizeof(struct xenpic) + NR_IRQS*sizeof(struct xenpic_intsrc), + M_DEVBUF, M_WAITOK); + + xp->xp_dynirq_pic = &xenpic_dynirq_template; + xp->xp_pirq_pic = &xenpic_pirq_template; + xp->xp_numintr = NR_IRQS; + bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_IRQS); + + + /* We need to register our PIC's beforehand */ + if (intr_register_pic(&xenpic_pirq_template)) + panic("XEN: intr_register_pic() failure"); + if (intr_register_pic(&xenpic_dynirq_template)) + panic("XEN: intr_register_pic() failure"); + + /* + * Initialize the dynamic IRQ's - we initialize the structures, but + * we do not bind them (bind_evtchn_to_irqhandle() does this) + */ + pin = xp->xp_pins; + for (i = 0; i < NR_DYNIRQS; i++) { + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + irq_bindcount[dynirq_to_irq(i)] = 0; + + tpin = &pin[dynirq_to_irq(i)]; + tpin->xp_intsrc.is_pic = xp->xp_dynirq_pic; + tpin->xp_vector = dynirq_to_irq(i); + + } + /* + * Now, we go ahead and claim every PIRQ there is. + */ + pin = xp->xp_pins; + for (i = 0; i < NR_PIRQS; i++) { + /* Dynamic IRQ space is currently unbound. Zero the refcnts. */ + irq_bindcount[pirq_to_irq(i)] = 0; + +#ifdef RTC_IRQ + /* If not domain 0, force our RTC driver to fail its probe. */ + if ((i == RTC_IRQ) && + !(xen_start_info->flags & SIF_INITDOMAIN)) + continue; +#endif + tpin = &pin[pirq_to_irq(i)]; + tpin->xp_intsrc.is_pic = xp->xp_pirq_pic; + tpin->xp_vector = pirq_to_irq(i); + + } +} + +SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_MIDDLE, evtchn_init, NULL); + /* + * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical + * section, to set pcpu->ipending (etc...) properly, we + * must be able to get the icu lock, so it can't be + * under witness. + */ + +MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_SPIN); Property changes on: xen/evtchn/evtchn.c ___________________________________________________________________ Added: svn:keywords + FreeBSD=%H Index: xen/reboot.c =================================================================== --- xen/reboot.c (.../stable/6/sys) (revision 0) +++ xen/reboot.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,262 @@ +/* + * + * Copyright (c) 2004 Christian Limpach. + * Copyright (c) 2004-2006,2008 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christian Limpach. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#ifdef XENHVM + +#include + +#else + +static void xen_suspend(void); + +#endif + +static void +shutdown_handler(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + char *str; + struct xenbus_transaction xbt; + int error, howto; + + howto = 0; + + again: + error = xenbus_transaction_start(&xbt); + if (error) + return; + + error = xenbus_read(xbt, "control", "shutdown", NULL, (void **) &str); + + /* Ignore read errors and empty reads. */ + if (error || strlen(str) == 0) { + xenbus_transaction_end(xbt, 1); + return; + } + + xenbus_write(xbt, "control", "shutdown", ""); + + error = xenbus_transaction_end(xbt, 0); + if (error == EAGAIN) { + free(str, M_DEVBUF); + goto again; + } + + if (strcmp(str, "reboot") == 0) + howto = 0; + else if (strcmp(str, "poweroff") == 0) + howto |= (RB_POWEROFF | RB_HALT); + else if (strcmp(str, "halt") == 0) +#ifdef XENHVM + /* + * We rely on acpi powerdown to halt the VM. + */ + howto |= (RB_POWEROFF | RB_HALT); +#else + howto |= RB_HALT; +#endif + else if (strcmp(str, "suspend") == 0) + howto = -1; + else { + printf("Ignoring shutdown request: %s\n", str); + goto done; + } + + if (howto == -1) { + xen_suspend(); + goto done; + } + + shutdown_nice(howto); + done: + free(str, M_DEVBUF); +} + +#ifndef XENHVM + +/* + * In HV mode, we let acpi take care of halts and reboots. + */ + +static void +xen_shutdown_final(void *arg, int howto) +{ + + if (howto & (RB_HALT | RB_POWEROFF)) + HYPERVISOR_shutdown(SHUTDOWN_poweroff); + else + HYPERVISOR_shutdown(SHUTDOWN_reboot); +} + +#endif + +static struct xenbus_watch shutdown_watch = { + .node = "control/shutdown", + .callback = shutdown_handler +}; + +static void +setup_shutdown_watcher(void *unused) +{ + + if (register_xenbus_watch(&shutdown_watch)) + printf("Failed to set shutdown watcher\n"); +#ifndef XENHVM + EVENTHANDLER_REGISTER(shutdown_final, xen_shutdown_final, NULL, + SHUTDOWN_PRI_LAST); +#endif +} + +SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL); + +#ifndef XENHVM + +extern void xencons_suspend(void); +extern void xencons_resume(void); + +static void +xen_suspend() +{ + int i, j, k, fpp; + unsigned long max_pfn, start_info_mfn; + +#ifdef SMP + cpumask_t map; + /* + * Bind us to CPU 0 and stop any other VCPUs. + */ + mtx_lock_spin(&sched_lock); + sched_bind(curthread, 0); + mtx_unlock_spin(&sched_lock); + KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0")); + + map = PCPU_GET(other_cpus) & ~stopped_cpus; + if (map) + stop_cpus(map); +#endif + + if (DEVICE_SUSPEND(root_bus) != 0) { + printf("xen_suspend: device_suspend failed\n"); + if (map) + restart_cpus(map); + return; + } + + local_irq_disable(); + + xencons_suspend(); + gnttab_suspend(); + + max_pfn = HYPERVISOR_shared_info->arch.max_pfn; + + void *shared_info = HYPERVISOR_shared_info; + HYPERVISOR_shared_info = NULL; + pmap_kremove((vm_offset_t) shared_info); + PT_UPDATES_FLUSH(); + + xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn); + xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn); + + /* + * We'll stop somewhere inside this hypercall. When it returns, + * we'll start resuming after the restore. + */ + start_info_mfn = VTOMFN(xen_start_info); + pmap_suspend(); + HYPERVISOR_suspend(start_info_mfn); + pmap_resume(); + + pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info); + HYPERVISOR_shared_info = shared_info; + + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = + VTOMFN(xen_pfn_to_mfn_frame_list_list); + + fpp = PAGE_SIZE/sizeof(unsigned long); + for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) { + if ((j % fpp) == 0) { + k++; + xen_pfn_to_mfn_frame_list_list[k] = + VTOMFN(xen_pfn_to_mfn_frame_list[k]); + j = 0; + } + xen_pfn_to_mfn_frame_list[k][j] = + VTOMFN(&xen_phys_machine[i]); + } + HYPERVISOR_shared_info->arch.max_pfn = max_pfn; + + gnttab_resume(); + irq_resume(); + local_irq_enable(); + xencons_resume(); + +#ifdef CONFIG_SMP + for_each_cpu(i) + vcpu_prepare(i); + +#endif + /* + * Only resume xenbus /after/ we've prepared our VCPUs; otherwise + * the VCPU hotplug callback can race with our vcpu_prepare + */ + DEVICE_RESUME(root_bus); + +#ifdef SMP + sched_unbind(curthread); + if (map) + restart_cpus(map); +#endif +} + +#endif Property changes on: xen/reboot.c ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/xen/xen_machdep.c:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/xen/xen_machdep.c:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: xen/xen_intr.h =================================================================== --- xen/xen_intr.h (.../stable/6/sys) (revision 0) +++ xen/xen_intr.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,102 @@ +/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ +#ifndef _XEN_INTR_H_ +#define _XEN_INTR_H_ + +/* +* The flat IRQ space is divided into two regions: +* 1. A one-to-one mapping of real physical IRQs. This space is only used +* if we have physical device-access privilege. This region is at the +* start of the IRQ space so that existing device drivers do not need +* to be modified to translate physical IRQ numbers into our IRQ space. +* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These +* are bound using the provided bind/unbind functions. +* +* +* $FreeBSD$ +*/ + +#define PIRQ_BASE 0 +#define NR_PIRQS 128 + +#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) +#define NR_DYNIRQS 128 + +#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) + +#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) +#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) + +#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) +#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) + +/* + * Dynamic binding of event channels and VIRQ sources to guest IRQ space. + */ + +/* + * Bind a caller port event channel to an interrupt handler. If + * successful, the guest IRQ number is returned in *irqp. Return zero + * on success or errno otherwise. + */ +extern int bind_caller_port_to_irqhandler(unsigned int caller_port, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp); + +/* + * Bind a listening port to an interrupt handler. If successful, the + * guest IRQ number is returned in *irqp. Return zero on success or + * errno otherwise. + */ +extern int bind_listening_port_to_irqhandler(unsigned int remote_domain, + const char *devname, driver_intr_t handler, void *arg, + unsigned long irqflags, unsigned int *irqp); + +/* + * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ + * number is returned in *irqp. Return zero on success or errno + * otherwise. + */ +extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, + const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp); + +/* + * Bind an IPI to an interrupt handler. If successful, the guest + * IRQ number is returned in *irqp. Return zero on success or errno + * otherwise. + */ +extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, + const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp); + +/* + * Bind an interdomain event channel to an interrupt handler. If + * successful, the guest IRQ number is returned in *irqp. Return zero + * on success or errno otherwise. + */ +extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, + unsigned int remote_port, const char *devname, driver_intr_t handler, + unsigned long irqflags, unsigned int *irqp); + +/* + * Unbind an interrupt handler using the guest IRQ number returned + * when it was bound. + */ +extern void unbind_from_irqhandler(unsigned int irq); + +static __inline__ int irq_cannonicalize(unsigned int irq) +{ + return (irq == 2) ? 9 : irq; +} + +extern void disable_irq(unsigned int); +extern void disable_irq_nosync(unsigned int); +extern void enable_irq(unsigned int); + +extern void irq_suspend(void); +extern void irq_resume(void); + +extern void idle_block(void); +extern int ap_cpu_initclocks(int cpu); + +#endif /* _XEN_INTR_H_ */ Property changes on: xen/xen_intr.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen/xen_intr.h:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen/xen_intr.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: xen/evtchn.h =================================================================== --- xen/evtchn.h (.../stable/6/sys) (revision 0) +++ xen/evtchn.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,94 @@ +/****************************************************************************** + * evtchn.h + * + * Communication via Xen event channels. + * Also definitions for the device that demuxes notifications to userspace. + * + * Copyright (c) 2004, K A Fraser + * + * $FreeBSD$ + */ + +#ifndef __ASM_EVTCHN_H__ +#define __ASM_EVTCHN_H__ +#include +#include +#include +#include + +/* + * LOW-LEVEL DEFINITIONS + */ + +/* + * Unlike notify_remote_via_evtchn(), this is safe to use across + * save/restore. Notifications on a broken connection are silently dropped. + */ +void notify_remote_via_irq(int irq); + + +/* Entry point for notifications into Linux subsystems. */ +void evtchn_do_upcall(struct intrframe *frame); + +/* Entry point for notifications into the userland character device. */ +void evtchn_device_upcall(int port); + +void mask_evtchn(int port); + +void unmask_evtchn(int port); + +#ifdef SMP +void rebind_evtchn_to_cpu(int port, unsigned int cpu); +#else +#define rebind_evtchn_to_cpu(port, cpu) ((void)0) +#endif + +static inline +int test_and_set_evtchn_mask(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + return synch_test_and_set_bit(port, s->evtchn_mask); +} + +static inline void +clear_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, &s->evtchn_pending[0]); +} + +static inline void +notify_remote_via_evtchn(int port) +{ + struct evtchn_send send = { .port = port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); +} + +/* + * Use these to access the event channel underlying the IRQ handle returned + * by bind_*_to_irqhandler(). + */ +int irq_to_evtchn_port(int irq); + +void ipi_pcpu(unsigned int cpu, int vector); + +/* + * CHARACTER-DEVICE DEFINITIONS + */ + +#define PORT_NORMAL 0x0000 +#define PORT_EXCEPTION 0x8000 +#define PORTIDX_MASK 0x7fff + +/* /dev/xen/evtchn resides at device number major=10, minor=200 */ +#define EVTCHN_MINOR 200 + +/* /dev/xen/evtchn ioctls: */ +/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ +#define EVTCHN_RESET _IO('E', 1) +/* EVTCHN_BIND: Bind to the specified event-channel port. */ +#define EVTCHN_BIND _IO('E', 2) +/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ +#define EVTCHN_UNBIND _IO('E', 3) + +#endif /* __ASM_EVTCHN_H__ */ Property changes on: xen/evtchn.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen/evtchn.h:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen/evtchn.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Added: svn:eol-style + native Index: amd64/include/pcpu.h =================================================================== --- amd64/include/pcpu.h (.../stable/6/sys) (revision 184012) +++ amd64/include/pcpu.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -35,6 +35,24 @@ #ifdef _KERNEL +#if defined(XEN) || defined(XENHVM) +#ifndef NR_VIRQS +#define NR_VIRQS 24 +#endif +#ifndef NR_IPIS +#define NR_IPIS 2 +#endif +#endif + +#ifdef XENHVM +#define PCPU_XEN_FIELDS \ + ; \ + unsigned int pc_last_processed_l1i; \ + unsigned int pc_last_processed_l2i +#else +#define PCPU_XEN_FIELDS +#endif + /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. @@ -49,7 +67,8 @@ register_t pc_rsp0; \ register_t pc_scratch_rsp; /* User %rsp in syscall */ \ u_int pc_apic_id; \ - u_int pc_acpi_id /* ACPI CPU id */ + u_int pc_acpi_id /* ACPI CPU id */ \ + PCPU_XEN_FIELDS #if defined(lint) Index: amd64/include/xen/xen-os.h =================================================================== --- amd64/include/xen/xen-os.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/xen-os.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,296 @@ +/****************************************************************************** + * os.h + * + * random collection of macros and definition + */ + +#ifndef _XEN_OS_H_ +#define _XEN_OS_H_ + +#ifdef PAE +#define CONFIG_X86_PAE +#endif + +#if !defined(__XEN_INTERFACE_VERSION__) +/* + * Can update to a more recent version when we implement + * the hypercall page + */ +#define __XEN_INTERFACE_VERSION__ 0x00030204 +#endif + +#include + +/* Force a proper event-channel callback from Xen. */ +void force_evtchn_callback(void); + +extern int gdtset; + +extern shared_info_t *HYPERVISOR_shared_info; + +/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ +static inline void rep_nop(void) +{ + __asm__ __volatile__ ( "rep;nop" : : : "memory" ); +} +#define cpu_relax() rep_nop() + +/* crude memory allocator for memory allocation early in + * boot + */ +void *bootmem_alloc(unsigned int size); +void bootmem_free(void *ptr, unsigned int size); + + +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + +void printk(const char *fmt, ...); + +/* some function prototypes */ +void trap_init(void); + +#define likely(x) __builtin_expect((x),1) +#define unlikely(x) __builtin_expect((x),0) + +#ifndef XENHVM + +/* + * STI/CLI equivalents. These basically set and clear the virtual + * event_enable flag in teh shared_info structure. Note that when + * the enable bit is set, there may be pending events to be handled. + * We may therefore call into do_hypervisor_callback() directly. + */ + +#define __cli() \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + +#define __sti() \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + _vcpu->evtchn_upcall_mask = 0; \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ +} while (0) + +#define __restore_flags(x) \ +do { \ + vcpu_info_t *_vcpu; \ + barrier(); \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \ + barrier(); /* unmask then check (avoid races) */ \ + if ( unlikely(_vcpu->evtchn_upcall_pending) ) \ + force_evtchn_callback(); \ + } \ +} while (0) + +/* + * Add critical_{enter, exit}? + * + */ +#define __save_and_cli(x) \ +do { \ + vcpu_info_t *_vcpu; \ + _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)]; \ + (x) = _vcpu->evtchn_upcall_mask; \ + _vcpu->evtchn_upcall_mask = 1; \ + barrier(); \ +} while (0) + + +#define cli() __cli() +#define sti() __sti() +#define save_flags(x) __save_flags(x) +#define restore_flags(x) __restore_flags(x) +#define save_and_cli(x) __save_and_cli(x) + +#define local_irq_save(x) __save_and_cli(x) +#define local_irq_restore(x) __restore_flags(x) +#define local_irq_disable() __cli() +#define local_irq_enable() __sti() + +#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));} +#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); } +#define spin_lock_irqsave mtx_lock_irqsave +#define spin_unlock_irqrestore mtx_unlock_irqrestore + +#else +#endif + +#ifndef mb +#define mb() __asm__ __volatile__("mfence":::"memory") +#endif +#ifndef rmb +#define rmb() __asm__ __volatile__("lfence":::"memory"); +#endif +#ifndef wmb +#define wmb() barrier() +#endif +#ifdef SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() wmb() +#define smp_read_barrier_depends() read_barrier_depends() +#define set_mb(var, value) do { xchg(&var, value); } while (0) +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while(0) +#define set_mb(var, value) do { var = value; barrier(); } while (0) +#endif + + +/* This is a barrier for the compiler only, NOT the processor! */ +#define barrier() __asm__ __volatile__("": : :"memory") + +#define LOCK_PREFIX "" +#define LOCK "" +#define ADDR (*(volatile long *) addr) +/* + * Make sure gcc doesn't try to be clever and move things around + * on us. We need to use _exactly_ the address the user gave us, + * not some alias that contains the same information. + */ +typedef struct { volatile int counter; } atomic_t; + + + +#define xen_xchg(ptr,v) \ + ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr)))) +struct __xchg_dummy { unsigned long a[100]; }; +#define __xg(x) ((volatile struct __xchg_dummy *)(x)) +static __inline unsigned long __xchg(unsigned long x, volatile void * ptr, + int size) +{ + switch (size) { + case 1: + __asm__ __volatile__("xchgb %b0,%1" + :"=q" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 2: + __asm__ __volatile__("xchgw %w0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + case 4: + __asm__ __volatile__("xchgl %0,%1" + :"=r" (x) + :"m" (*__xg(ptr)), "0" (x) + :"memory"); + break; + } + return x; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies a memory barrier. + */ +static __inline int test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( LOCK_PREFIX + "btrl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit),"=m" (ADDR) + :"Ir" (nr) : "memory"); + return oldbit; +} + +static __inline int constant_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline int variable_test_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__( + "btl %2,%1\n\tsbbl %0,%0" + :"=r" (oldbit) + :"m" (ADDR),"Ir" (nr)); + return oldbit; +} + +#define test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + constant_test_bit((nr),(addr)) : \ + variable_test_bit((nr),(addr))) + + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btsl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__( LOCK_PREFIX + "btrl %1,%0" + :"=m" (ADDR) + :"Ir" (nr)); +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. Note that the guaranteed + * useful range of an atomic_t is only 24 bits. + */ +static __inline__ void atomic_inc(atomic_t *v) +{ + __asm__ __volatile__( + LOCK "incl %0" + :"=m" (v->counter) + :"m" (v->counter)); +} + + +#define rdtscll(val) \ + __asm__ __volatile__("rdtsc" : "=A" (val)) + +#endif /* !__ASSEMBLY__ */ + +#endif /* _OS_H_ */ Property changes on: amd64/include/xen/xen-os.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: amd64/include/xen/hypercall.h =================================================================== --- amd64/include/xen/hypercall.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/hypercall.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,415 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __MACHINE_XEN_HYPERCALL_H__ +#define __MACHINE_XEN_HYPERCALL_H__ + +#include + +#ifndef __XEN_HYPERVISOR_H__ +# error "please don't include this file directly" +#endif + +#define __STR(x) #x +#define STR(x) __STR(x) +#define ENOXENSYS 38 +#define CONFIG_XEN_COMPAT 0x030002 +#define __must_check + +#ifdef XEN +#define HYPERCALL_STR(name) \ + "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)" +#else +#define HYPERCALL_STR(name) \ + "mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\ + "add hypercall_stubs(%%rip),%%rax; " \ + "call *%%rax" +#endif + +#define _hypercall0(type, name) \ +({ \ + type __res; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res) \ + : \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall1(type, name, a1) \ +({ \ + type __res; \ + long __ign1; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1) \ + : "1" ((long)(a1)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + type __res; \ + long __ign1, __ign2; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ + : "1" ((long)(a1)), "2" ((long)(a2)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + register long __arg4 __asm__("r10") = (long)(a4); \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3), "+r" (__arg4) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + long __ign1, __ign2, __ign3; \ + register long __arg4 __asm__("r10") = (long)(a4); \ + register long __arg5 __asm__("r8") = (long)(a5); \ + __asm__ volatile ( \ + HYPERCALL_STR(name) \ + : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ + "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ + : "1" ((long)(a1)), "2" ((long)(a2)), \ + "3" ((long)(a3)) \ + : "memory" ); \ + __res; \ +}) + +static inline int __must_check +HYPERVISOR_set_trap_table( + const trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int __must_check +HYPERVISOR_mmu_update( + mmu_update_t *req, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_mmuext_op( + struct mmuext_op *op, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_set_gdt( + unsigned long *frame_list, unsigned int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int __must_check +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int __must_check +HYPERVISOR_set_callbacks( + unsigned long event_address, unsigned long failsafe_address, + unsigned long syscall_address) +{ + return _hypercall3(int, set_callbacks, + event_address, failsafe_address, syscall_address); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +static inline int __must_check +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline long __must_check +HYPERVISOR_set_timer_op( + uint64_t timeout) +{ + return _hypercall1(long, set_timer_op, timeout); +} + +static inline int __must_check +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} + +static inline int __must_check +HYPERVISOR_set_debugreg( + unsigned int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long __must_check +HYPERVISOR_get_debugreg( + unsigned int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int __must_check +HYPERVISOR_update_descriptor( + unsigned long ma, unsigned long word) +{ + return _hypercall2(int, update_descriptor, ma, word); +} + +static inline int __must_check +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_multicall( + multicall_entry_t *call_list, unsigned int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int __must_check +HYPERVISOR_update_va_mapping( + unsigned long va, uint64_t new_val, unsigned long flags) +{ + return _hypercall3(int, update_va_mapping, va, new_val, flags); +} + +static inline int __must_check +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOXENSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_console_io( + int cmd, unsigned int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int __must_check +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOXENSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int __must_check +HYPERVISOR_update_va_mapping_otherdomain( + unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid) +{ + return _hypercall4(int, update_va_mapping_otherdomain, va, + new_val, flags, domid); +} + +static inline int __must_check +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int __must_check +HYPERVISOR_vcpu_op( + int cmd, unsigned int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int __must_check +HYPERVISOR_set_segment_base( + int reg, unsigned long value) +{ + return _hypercall2(int, set_segment_base, reg, value); +} + +static inline int __must_check +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOXENSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +#endif + + return rc; +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int +HYPERVISOR_nmi_op( + unsigned long op, void *arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} +#endif + +#ifndef CONFIG_XEN +static inline unsigned long __must_check +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_callback_op( + int cmd, const void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +#undef __must_check + +#endif /* __MACHINE_XEN_HYPERCALL_H__ */ Property changes on: amd64/include/xen/hypercall.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: amd64/include/xen/xenvar.h =================================================================== --- amd64/include/xen/xenvar.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/xenvar.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2008 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * $FreeBSD$ + */ +#ifndef XENVAR_H_ +#define XENVAR_H_ +#define XBOOTUP 0x1 +#define XPMAP 0x2 +extern int xendebug_flags; +#ifndef NOXENDEBUG +#define XENPRINTF printk +#else +#define XENPRINTF printf +#endif +#include + +#if 0 +#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__) +#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__) +#define TRACE_DEBUG(argflags, _f, _a...) \ +if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a); +#else +#define TRACE_ENTER +#define TRACE_EXIT +#define TRACE_DEBUG(argflags, _f, _a...) +#endif + +#ifdef XENHVM + +static inline vm_paddr_t +phystomach(vm_paddr_t pa) +{ + + return (pa); +} + +static inline vm_paddr_t +machtophys(vm_paddr_t ma) +{ + + return (ma); +} + +#define vtomach(va) pmap_kextract((vm_offset_t) (va)) +#define PFNTOMFN(pa) (pa) +#define MFNTOPFN(ma) (ma) + +#define set_phys_to_machine(pfn, mfn) ((void)0) +#define PT_UPDATES_FLUSH() ((void)0) + +#else + +extern xen_pfn_t *xen_phys_machine; + + +extern xen_pfn_t *xen_machine_phys; +/* Xen starts physical pages after the 4MB ISA hole - + * FreeBSD doesn't + */ + + +#undef ADD_ISA_HOLE /* XXX */ + +#ifdef ADD_ISA_HOLE +#define ISA_INDEX_OFFSET 1024 +#define ISA_PDR_OFFSET 1 +#else +#define ISA_INDEX_OFFSET 0 +#define ISA_PDR_OFFSET 0 +#endif + + +#define PFNTOMFN(i) (xen_phys_machine[(i)]) +#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)]) + +#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE) +#define PTOV(x) (((uintptr_t)(x)) + KERNBASE) + +#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT) +#define PFNTOV(x) PTOV((vm_paddr_t)(x) << PAGE_SHIFT) + +#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT) +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) + +#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT) +#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT) + +#endif + +void xpq_init(void); + +int xen_create_contiguous_region(vm_page_t pages, int npages); + +void xen_destroy_contiguous_region(void * addr, int npages); + +#endif Property changes on: amd64/include/xen/xenvar.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: amd64/include/xen/synch_bitops.h =================================================================== --- amd64/include/xen/synch_bitops.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/synch_bitops.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,129 @@ +#ifndef __XEN_SYNCH_BITOPS_H__ +#define __XEN_SYNCH_BITOPS_H__ + +/* + * Copyright 1992, Linus Torvalds. + * Heavily modified to provide guaranteed strong synchronisation + * when communicating with Xen or other guest OSes running on other CPUs. + */ + + +#define ADDR (*(volatile long *) addr) + +static __inline__ void synch_set_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btsl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_clear_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btrl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ void synch_change_bit(int nr, volatile void * addr) +{ + __asm__ __volatile__ ( + "lock btcl %1,%0" + : "=m" (ADDR) : "Ir" (nr) : "memory" ); +} + +static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btsl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "lock btrl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr) +{ + int oldbit; + + __asm__ __volatile__ ( + "lock btcl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory"); + return oldbit; +} + +struct __synch_xchg_dummy { unsigned long a[100]; }; +#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x)) + +#define synch_cmpxchg(ptr, old, new) \ +((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\ + (unsigned long)(old), \ + (unsigned long)(new), \ + sizeof(*(ptr)))) + +static inline unsigned long __synch_cmpxchg(volatile void *ptr, + unsigned long old, + unsigned long new, int size) +{ + unsigned long prev; + switch (size) { + case 1: + __asm__ __volatile__("lock; cmpxchgb %b1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 2: + __asm__ __volatile__("lock; cmpxchgw %w1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 4: + __asm__ __volatile__("lock; cmpxchgl %k1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + case 8: + __asm__ __volatile__("lock; cmpxchgq %1,%2" + : "=a"(prev) + : "q"(new), "m"(*__synch_xg(ptr)), + "0"(old) + : "memory"); + return prev; + } + return old; +} + +static __inline__ int synch_const_test_bit(int nr, const volatile void * addr) +{ + return ((1UL << (nr & 31)) & + (((const volatile unsigned int *) addr)[nr >> 5])) != 0; +} + +static __inline__ int synch_var_test_bit(int nr, volatile void * addr) +{ + int oldbit; + __asm__ __volatile__ ( + "btl %2,%1\n\tsbbl %0,%0" + : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) ); + return oldbit; +} + +#define synch_test_bit(nr,addr) \ +(__builtin_constant_p(nr) ? \ + synch_const_test_bit((nr),(addr)) : \ + synch_var_test_bit((nr),(addr))) + +#endif /* __XEN_SYNCH_BITOPS_H__ */ Property changes on: amd64/include/xen/synch_bitops.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: amd64/include/xen/xenfunc.h =================================================================== --- amd64/include/xen/xenfunc.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/xenfunc.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,83 @@ +/* + * + * Copyright (c) 2004,2005 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _XEN_XENFUNC_H_ +#define _XEN_XENFUNC_H_ + +#ifdef XENHVM +#include +#else +#include +#include +#endif + +#define BKPT __asm__("int3"); +#define XPQ_CALL_DEPTH 5 +#define XPQ_CALL_COUNT 2 +#define PG_PRIV PG_AVAIL3 +typedef struct { + unsigned long pt_ref; + unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH]; +} pteinfo_t; + +extern pteinfo_t *pteinfo_list; +#ifdef XENDEBUG_LOW +#define __PRINTK(x) printk x +#else +#define __PRINTK(x) +#endif + +char *xen_setbootenv(char *cmd_line); + +int xen_boothowto(char *envp); + +void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line); + +#ifdef INVARIANTS +#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__) +#else +#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0) +#endif + +#ifndef XENHVM +void xen_update_descriptor(union descriptor *, union descriptor *); +#endif + +extern struct mtx balloon_lock; +#if 0 +#define balloon_lock(__flags) mtx_lock_irqsave(&balloon_lock, __flags) +#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags) +#else +#define balloon_lock(__flags) __flags = 1 +#define balloon_unlock(__flags) __flags = 0 +#endif + + + +#endif /* _XEN_XENFUNC_H_ */ Property changes on: amd64/include/xen/xenfunc.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Index: amd64/include/xen/xenpmap.h =================================================================== --- amd64/include/xen/xenpmap.h (.../stable/6/sys) (revision 0) +++ amd64/include/xen/xenpmap.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,227 @@ +/* + * + * Copyright (c) 2004 Christian Limpach. + * Copyright (c) 2004,2005 Kip Macy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by Christian Limpach. + * 4. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef _XEN_XENPMAP_H_ +#define _XEN_XENPMAP_H_ + +#include + +void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int); +void xen_pt_switch(vm_paddr_t); +void xen_set_ldt(vm_paddr_t, unsigned long); +void xen_pgdpt_pin(vm_paddr_t); +void xen_pgd_pin(vm_paddr_t); +void xen_pgd_unpin(vm_paddr_t); +void xen_pt_pin(vm_paddr_t); +void xen_pt_unpin(vm_paddr_t); +void xen_flush_queue(void); +void xen_check_queue(void); +#if 0 +void pmap_ref(pt_entry_t *pte, vm_paddr_t ma); +#endif + +#ifdef INVARIANTS +#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__) +#else +#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0) +#endif + +#ifdef PMAP_DEBUG +#define PMAP_REF pmap_ref +#define PMAP_DEC_REF_PAGE pmap_dec_ref_page +#define PMAP_MARK_PRIV pmap_mark_privileged +#define PMAP_MARK_UNPRIV pmap_mark_unprivileged +#else +#define PMAP_MARK_PRIV(a) +#define PMAP_MARK_UNPRIV(a) +#define PMAP_REF(a, b) +#define PMAP_DEC_REF_PAGE(a) +#endif + +#define ALWAYS_SYNC 0 + +#ifdef PT_DEBUG +#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) +#else +#define PT_LOG() +#endif + +#define INVALID_P2M_ENTRY (~0UL) + +#define pmap_valid_entry(E) ((E) & PG_V) /* is PDE or PTE valid? */ + +#define SH_PD_SET_VA 1 +#define SH_PD_SET_VA_MA 2 +#define SH_PD_SET_VA_CLEAR 3 + +struct pmap; +void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type); +#ifdef notyet +static vm_paddr_t +vptetomachpte(vm_paddr_t *pte) +{ + vm_offset_t offset, ppte; + vm_paddr_t pgoffset, retval, *pdir_shadow_ptr; + int pgindex; + + ppte = (vm_offset_t)pte; + pgoffset = (ppte & PAGE_MASK); + offset = ppte - (vm_offset_t)PTmap; + pgindex = ppte >> PDRSHIFT; + + pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow); + retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset; + return (retval); +} +#endif +#define PT_GET(_ptp) \ + (pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0)) + +#ifdef WRITABLE_PAGETABLES + +#define PT_SET_VA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + PT_LOG(); \ + *(_ptp) = xpmap_ptom((_npte)); \ +} while (/*CONSTCOND*/0) +#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + PT_LOG(); \ + *(_ptp) = (_npte); \ +} while (/*CONSTCOND*/0) +#define PT_CLEAR_VA(_ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + PT_LOG(); \ + *(_ptp) = 0; \ +} while (/*CONSTCOND*/0) + +#define PD_SET_VA(_pmap, _ptp, _npte, sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_CLEAR_VA(_pmap, _ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#else /* !WRITABLE_PAGETABLES */ + +#define PT_SET_VA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + xen_queue_pt_update(vtomach(_ptp), \ + xpmap_ptom(_npte)); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PT_SET_VA_MA(_ptp,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + xen_queue_pt_update(vtomach(_ptp), _npte); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PT_CLEAR_VA(_ptp, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + xen_queue_pt_update(vtomach(_ptp), 0); \ + if (sync || ALWAYS_SYNC) \ + xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do { \ + PMAP_REF((_ptp), xpmap_ptom(_npte)); \ + pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do { \ + PMAP_REF((_ptp), (_npte)); \ + pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) +#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do { \ + PMAP_REF((pt_entry_t *)(_ptp), 0); \ + pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR); \ + if (sync || ALWAYS_SYNC) xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +#endif + +#define PT_SET_MA(_va, _ma) \ +do { \ + PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\ + (_ma), \ + UVMF_INVLPG| UVMF_ALL) < 0); \ +} while (/*CONSTCOND*/0) + +#define PT_UPDATES_FLUSH() do { \ + xen_flush_queue(); \ +} while (/*CONSTCOND*/0) + +static __inline vm_paddr_t +xpmap_mtop(vm_paddr_t mpa) +{ + vm_paddr_t tmp = (mpa & PG_FRAME); + + return machtophys(tmp) | (mpa & ~PG_FRAME); +} + +static __inline vm_paddr_t +xpmap_ptom(vm_paddr_t ppa) +{ + vm_paddr_t tmp = (ppa & PG_FRAME); + + return phystomach(tmp) | (ppa & ~PG_FRAME); +} + +static __inline void +set_phys_to_machine(unsigned long pfn, unsigned long mfn) +{ +#ifdef notyet + PANIC_IF(max_mapnr && pfn >= max_mapnr); +#endif + if (xen_feature(XENFEAT_auto_translated_physmap)) { +#ifdef notyet + PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY)); +#endif + return; + } + xen_phys_machine[pfn] = mfn; +} + + + + +#endif /* _XEN_XENPMAP_H_ */ Property changes on: amd64/include/xen/xenpmap.h ___________________________________________________________________ Added: svn:mime-type + text/plain Added: svn:keywords + FreeBSD=%H Added: svn:eol-style + native Property changes on: amd64/include/xen ___________________________________________________________________ Added: svn:mergeinfo Merged /stable/7/sys/i386/include/xen:r172506,172810,175956,179044,179776,180149,182402 Merged /head/sys/i386/include/xen:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628 Index: amd64/conf/XENHVM =================================================================== --- amd64/conf/XENHVM (.../stable/6/sys) (revision 0) +++ amd64/conf/XENHVM (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -0,0 +1,143 @@ +# +# XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64 +# +# For more information on this file, please read the handbook section on +# Kernel Configuration Files: +# +# http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html +# +# The handbook is also available locally in /usr/share/doc/handbook +# if you've installed the doc distribution, otherwise always see the +# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the +# latest information. +# +# An exhaustive list of options and more detailed explanations of the +# device lines is also present in the ../../conf/NOTES and NOTES files. +# If you are in doubt as to the purpose or necessity of a line, check first +# in NOTES. +# +# $FreeBSD$ + +machine amd64 +cpu HAMMER +ident XENHVM + +# To statically compile in device wiring instead of /boot/device.hints +#hints "GENERIC.hints" # Default places to look for devices. + +makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols +makeoptions MODULES_OVERRIDE="" + +options SCHED_4BSD # 4BSD scheduler +options PREEMPTION # Enable kernel thread preemption +options INET # InterNETworking +options INET6 # IPv6 communications protocols +options FFS # Berkeley Fast Filesystem +options SOFTUPDATES # Enable FFS soft updates support +options UFS_ACL # Support for access control lists +options UFS_DIRHASH # Improve performance on big directories +options MD_ROOT # MD is a potential root device +options NFSCLIENT # Network Filesystem Client +options NFSSERVER # Network Filesystem Server +options NFSLOCKD # Network Lock Manager +options NFS_ROOT # NFS usable as /, requires NFSCLIENT +options NTFS # NT File System +options MSDOSFS # MSDOS Filesystem +options CD9660 # ISO 9660 Filesystem +options PROCFS # Process filesystem (requires PSEUDOFS) +options PSEUDOFS # Pseudo-filesystem framework +options GEOM_GPT # GUID Partition Tables. +options COMPAT_43 # Needed by COMPAT_LINUX32 +options COMPAT_IA32 # Compatible with i386 binaries +options COMPAT_FREEBSD4 # Compatible with FreeBSD4 +options COMPAT_FREEBSD5 # Compatible with FreeBSD5 +options COMPAT_LINUX32 # Compatible with i386 linux binaries +options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI +options KTRACE # ktrace(1) support +options SYSVSHM # SYSV-style shared memory +options SYSVMSG # SYSV-style message queues +options SYSVSEM # SYSV-style semaphores +options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions +options KBD_INSTALL_CDEV # install a CDEV entry in /dev +options NO_ADAPTIVE_MUTEXES +#options ADAPTIVE_GIANT # Giant mutex is adaptive. + +options KDB +options DDB + +# Workarounds for some known-to-be-broken chipsets (nVidia nForce3-Pro150) +device atpic # 8259A compatability + +# Linux 32-bit ABI support +options LINPROCFS # Cannot be a module yet. +options SMP +options GDB + +# Bus support. +device acpi +device pci + +# Floppy drives +#device fdc + +# Xen HVM support +options XENHVM +device xenpci + +# ATA and ATAPI devices +device ata +device atadisk # ATA disk drives +device ataraid # ATA RAID drives +device atapicd # ATAPI CDROM drives +device atapifd # ATAPI floppy drives +device atapist # ATAPI tape drives +options ATA_STATIC_ID # Static device numbering + + +# SCSI peripherals +device scbus # SCSI bus (required for SCSI) +device da # Direct Access (disks) +device sa # Sequential Access (tape etc) +device cd # CD +device pass # Passthrough device (direct SCSI access) + +# atkbdc0 controls both the keyboard and the PS/2 mouse +device atkbdc # AT keyboard controller +device atkbd # AT keyboard +device psm # PS/2 mouse + +device kbdmux # keyboard multiplexer + +device vga # VGA video card driver + +device splash # Splash screen and screen saver support + +# syscons is the default console driver, resembling an SCO console +device sc + +device agp # support several AGP chipsets + +# Serial (COM) ports +device sio # 8250, 16[45]50 based serial ports + +# PCI Ethernet NICs that use the common MII bus controller code. +# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs! +device miibus # MII bus support +device re # RealTek 8139C+/8169/8169S/8110S + +# Pseudo devices. +device loop # Network loopback +device random # Entropy device +device ether # Ethernet support +device sl # Kernel SLIP +device ppp # Kernel PPP +device tun # Packet tunnel. +device pty # Pseudo-ttys (telnet etc) +device md # Memory "disks" +device gif # IPv6 and IPv4 tunneling +device faith # IPv6-to-IPv4 relaying (translation) + +# The `bpf' device enables the Berkeley Packet Filter. +# Be aware of the administrative consequences of enabling this! +# Note that 'bpf' is required for DHCP. +device bpf # Berkeley packet filter Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (.../stable/6/sys) (revision 184012) +++ amd64/amd64/pmap.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -144,6 +144,11 @@ #ifdef SMP #include #endif +#ifdef XENHVM +#include +#include +#include +#endif #ifndef PMAP_SHPGPERPROC #define PMAP_SHPGPERPROC 200 @@ -793,8 +798,20 @@ sched_pin(); if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { - invltlb(); - smp_invltlb(); +#if defined(XENHVM) && defined(notdef) + /* + * As far as I can tell, this makes things slower, at + * least where there are only two physical cpus and + * the host is not overcommitted. + */ + if (is_running_on_xen()) { + HYPERVISOR_hvm_op(HVMOP_flush_tlbs, NULL); + } else +#endif + { + invltlb(); + smp_invltlb(); + } } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); Index: amd64/amd64/machdep.c =================================================================== --- amd64/amd64/machdep.c (.../stable/6/sys) (revision 184012) +++ amd64/amd64/machdep.c (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -1352,6 +1352,14 @@ if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); +#ifdef XENHVM + if (inw(0x10) == 0x49d2) { + if (bootverbose) + printf("Xen detected: disabling emulated block and network devices\n"); + outw(0x10, 3); + } +#endif + /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } Index: sys/libkern.h =================================================================== --- sys/libkern.h (.../stable/6/sys) (revision 184012) +++ sys/libkern.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -98,6 +98,7 @@ int strcasecmp(const char *, const char *); char *strcat(char * __restrict, const char * __restrict); int strcmp(const char *, const char *); +size_t strcspn(const char *s, const char *charset); char *strcpy(char * __restrict, const char * __restrict); char *strdup(const char *__restrict, struct malloc_type *); size_t strlcat(char *, const char *, size_t); @@ -151,6 +152,18 @@ return (b); } +static __inline char * +strchr(const char *p, int ch) +{ + return (index(p, ch)); +} + +static __inline char * +strrchr(const char *p, int ch) +{ + return (rindex(p, ch)); +} + /* fnmatch() return values. */ #define FNM_NOMATCH 1 /* Match failed. */ Index: sys/mutex.h =================================================================== --- sys/mutex.h (.../stable/6/sys) (revision 184012) +++ sys/mutex.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -325,6 +325,9 @@ #define mtx_trylock_flags(m, opts) \ _mtx_trylock((m), (opts), LOCK_FILE, LOCK_LINE) +#define mtx_sleep(chan, mtx, pri, wmesg, timo) \ + _sleep((chan), &(mtx)->mtx_object, (pri), (wmesg), (timo)) + #define mtx_initialized(m) lock_initalized(&(m)->mtx_object) #define mtx_owned(m) (((m)->mtx_lock & MTX_FLAGMASK) == (uintptr_t)curthread) Index: sys/sx.h =================================================================== --- sys/sx.h (.../stable/6/sys) (revision 184012) +++ sys/sx.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -257,6 +257,9 @@ sx_sunlock(sx); \ } while (0) +#define sx_sleep(chan, sx, pri, wmesg, timo) \ + _sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo)) + /* * Options passed to sx_init_flags(). */ Index: sys/systm.h =================================================================== --- sys/systm.h (.../stable/6/sys) (revision 184012) +++ sys/systm.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -113,6 +113,7 @@ * General function declarations. */ +struct lock_object; struct clockframe; struct malloc_type; struct mtx; @@ -295,6 +296,8 @@ * Common `proc' functions are declared here so that proc.h can be included * less often. */ +int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, + int timo) __nonnull(1); int msleep(void *chan, struct mtx *mtx, int pri, const char *wmesg, int timo); int msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo); Index: sys/sleepqueue.h =================================================================== --- sys/sleepqueue.h (.../stable/6/sys) (revision 184012) +++ sys/sleepqueue.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -83,11 +83,13 @@ #ifdef _KERNEL #define SLEEPQ_TYPE 0x0ff /* Mask of sleep queue types. */ -#define SLEEPQ_MSLEEP 0x00 /* Used by msleep/wakeup. */ +#define SLEEPQ_SLEEP 0x00 /* Used by msleep/wakeup. */ #define SLEEPQ_CONDVAR 0x01 /* Used for a cv. */ +#define SLEEPQ_PAUSE 0x02 /* Used by pause. */ #define SLEEPQ_SX 0x03 /* Used by an sx lock. */ #define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */ + void init_sleepqueues(void); void sleepq_abort(struct thread *td, int intrval); void sleepq_add(void *, struct lock_object *, const char *, int, int); Index: sys/lock.h =================================================================== --- sys/lock.h (.../stable/6/sys) (revision 184012) +++ sys/lock.h (.../user/dfr/xenhvm/6/sys) (revision 190588) @@ -51,6 +51,8 @@ const char *lc_name; u_int lc_flags; void (*lc_ddb_show)(struct lock_object *lock); + void (*lc_lock)(struct lock_object *lock, int how); + int (*lc_unlock)(struct lock_object *lock); }; #define LC_SLEEPLOCK 0x00000001 /* Sleep lock. */ Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /projects/releng_6_xen/sys:r185181-186767