Index: conf/options.i386
===================================================================
--- conf/options.i386	(.../stable/6/sys)	(revision 184012)
+++ conf/options.i386	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -163,3 +163,6 @@
 # Debugging
 KDB_STOP_NMI		opt_kdb.h
 NPX_DEBUG		opt_npx.h
+
+NATIVE			opt_global.h
+XEN			opt_global.h
Index: conf/kern.pre.mk
===================================================================
--- conf/kern.pre.mk	(.../stable/6/sys)	(revision 184012)
+++ conf/kern.pre.mk	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -20,12 +20,12 @@
 COPTFLAGS?=	-O
 .else
 . if defined(DEBUG)
-_MINUS_O=	-O
+_MINUS_O=	-O -fno-optimize-sibling-calls
 . else
 _MINUS_O=	-O2
 . endif
 . if ${MACHINE_ARCH} == "amd64"
-COPTFLAGS?=-O2 -frename-registers -pipe
+COPTFLAGS?=${_MINUS_O} -frename-registers -pipe
 . else
 COPTFLAGS?=${_MINUS_O} -pipe
 . endif
@@ -70,6 +70,9 @@
 # .. and the same for em
 INCLUDES+= -I$S/dev/em
 
+INCLUDES+= -I$S/xen/interface -I$S/xen/interface/io -I$S/xen/interface/hvm
+
+
 CFLAGS=	${COPTFLAGS} ${CWARNFLAGS} ${DEBUG}
 CFLAGS+= ${INCLUDES} -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h
 .if ${CC} != "icc"
Index: conf/files.i386
===================================================================
--- conf/files.i386	(.../stable/6/sys)	(revision 184012)
+++ conf/files.i386	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -291,8 +291,8 @@
 i386/i386/atomic.c		standard				\
 	compile-with	"${CC} -c ${CFLAGS} ${DEFINED_PROF:S/^$/-fomit-frame-pointer/} ${.IMPSRC}"
 i386/i386/autoconf.c		standard
-i386/i386/bios.c		standard
-i386/i386/bioscall.s		standard
+i386/i386/bios.c		optional native
+i386/i386/bioscall.s		optional native	
 i386/i386/busdma_machdep.c	standard
 i386/i386/db_disasm.c		optional ddb
 i386/i386/db_interface.c	optional ddb
@@ -301,7 +301,8 @@
 i386/i386/elan-mmcr.c		optional cpu_elan
 i386/i386/elan-mmcr.c		optional cpu_soekris
 i386/i386/elf_machdep.c		standard
-i386/i386/exception.s		standard
+i386/i386/exception.s		optional native
+i386/xen/exception.s		optional xen
 i386/i386/gdb_machdep.c		optional gdb
 i386/i386/geode.c		optional cpu_geode
 i386/i386/i686_mem.c		optional mem
@@ -314,22 +315,27 @@
 i386/i386/k6_mem.c		optional mem
 i386/i386/legacy.c		standard
 i386/i386/local_apic.c		optional apic
-i386/i386/locore.s		standard	no-obj
+i386/i386/locore.s		optional native	no-obj
+i386/xen/locore.s		optional xen	no-obj
 i386/i386/longrun.c		optional cpu_enable_longrun
 i386/i386/machdep.c		standard
 i386/i386/mem.c			optional mem
 i386/i386/minidump_machdep.c	standard
 i386/i386/mp_clock.c		optional smp
-i386/i386/mp_machdep.c		optional smp
+i386/i386/mp_machdep.c		optional native smp
+i386/xen/mp_machdep.c		optional xen smp
 i386/i386/mp_watchdog.c		optional mp_watchdog smp
-i386/i386/mpboot.s		optional smp
-i386/i386/mptable.c		optional apic
+i386/i386/mpboot.s		optional native smp
+i386/xen/mptable.c		optional apic xen
+i386/i386/mptable.c		optional apic native
 i386/i386/mptable_pci.c		optional apic pci
 i386/i386/msi.c			optional apic pci
 i386/i386/nexus.c		standard
 i386/i386/perfmon.c		optional perfmon
 i386/i386/perfmon.c		optional perfmon	profiling-routine
-i386/i386/pmap.c		standard
+i386/i386/pmap.c		optional native
+i386/xen/pmap.c			optional xen
+i386/xen/xen_machdep.c		optional xen
 i386/i386/ptrace_machdep.c	standard
 i386/i386/support.s		standard
 i386/i386/swtch.s		standard
@@ -358,9 +364,10 @@
 i386/ibcs2/ibcs2_xenix.c	optional ibcs2
 i386/ibcs2/ibcs2_xenix_sysent.c	optional ibcs2
 i386/ibcs2/imgact_coff.c	optional ibcs2
-i386/isa/atpic.c		standard
+i386/isa/atpic.c		optional atpic
 #i386/isa/atpic_vector.s		standard
-i386/isa/clock.c		standard
+i386/isa/clock.c		optional native
+i386/xen/clock.c		optional xen
 i386/isa/elcr.c			standard
 i386/isa/elink.c		optional ep
 i386/isa/elink.c		optional ie
Index: conf/files
===================================================================
--- conf/files	(.../stable/6/sys)	(revision 184012)
+++ conf/files	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -1475,6 +1475,7 @@
 libkern/strcat.c		standard
 libkern/strcmp.c		standard
 libkern/strcpy.c		standard
+libkern/strcspn.c		standard
 libkern/strdup.c		standard
 libkern/strlcat.c		standard
 libkern/strlcpy.c		standard
@@ -2043,4 +2044,41 @@
 xdr/xdr_mbuf.c			optional nfslockd
 xdr/xdr_mem.c			optional nfslockd
 xdr/xdr_reference.c		optional nfslockd
-xdr/xdr_sizeof.c		optional nfslockd
\ No newline at end of file
+xdr/xdr_sizeof.c		optional nfslockd
+
+
+xen/gnttab.c                   optional xen
+xen/features.c                 optional xen
+xen/evtchn/evtchn.c            optional xen
+xen/evtchn/evtchn_dev.c                optional xen
+xen/reboot.c			optional xen
+xen/xenbus/xenbus_client.c     optional xen
+xen/xenbus/xenbus_comms.c      optional xen
+xen/xenbus/xenbus_dev.c                optional xen
+xen/xenbus/xenbus_if.m		optional xen
+xen/xenbus/xenbus_probe.c      optional xen
+#xen/xenbus/xenbus_probe_backend.c      optional xen
+xen/xenbus/xenbus_xs.c         optional xen
+dev/xen/balloon/balloon.c	optional xen
+dev/xen/balloon/balloon.c	optional xenhvm
+dev/xen/console/console.c      optional xen
+dev/xen/console/xencons_ring.c optional xen
+dev/xen/blkfront/blkfront.c    optional xen
+dev/xen/netfront/netfront.c    optional xen
+dev/xen/blkfront/blkfront.c    optional xenhvm
+dev/xen/netfront/netfront.c    optional xenhvm
+
+xen/gnttab.c                    optional xenhvm
+xen/features.c                  optional xenhvm
+dev/xen/xenpci/evtchn.c         optional xenhvm
+dev/xen/xenpci/machine_reboot.c optional xenhvm
+xen/evtchn/evtchn_dev.c         optional xenhvm
+xen/reboot.c			optional xenhvm
+xen/xenbus/xenbus_client.c      optional xenhvm
+xen/xenbus/xenbus_comms.c       optional xenhvm
+xen/xenbus/xenbus_dev.c         optional xenhvm
+xen/xenbus/xenbus_if.m		optional xenhvm
+xen/xenbus/xenbus_probe.c       optional xenhvm
+#xen/xenbus/xenbus_probe_backend.c optional xenhvm
+xen/xenbus/xenbus_xs.c          optional xenhvm
+dev/xen/xenpci/xenpci.c         optional xenpci
Index: conf/options.amd64
===================================================================
--- conf/options.amd64	(.../stable/6/sys)	(revision 184012)
+++ conf/options.amd64	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -58,3 +58,5 @@
 
 # Debugging
 KDB_STOP_NMI		opt_kdb.h
+
+XENHVM			opt_global.h
Index: kern/kern_timeout.c
===================================================================
--- kern/kern_timeout.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_timeout.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -557,7 +557,7 @@
 				mtx_unlock_spin(&callout_lock);
 				sleepq_add(&callout_wait,
 				    &callout_lock.mtx_object, "codrain",
-				    SLEEPQ_MSLEEP, 0);
+				    SLEEPQ_SLEEP, 0);
 				sleepq_wait(&callout_wait);
 				sq_locked = 0;
 
Index: kern/kern_mutex.c
===================================================================
--- kern/kern_mutex.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_mutex.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -92,25 +92,66 @@
 #ifdef DDB
 static void	db_show_mtx(struct lock_object *lock);
 #endif
+static void	lock_mtx(struct lock_object *lock, int how);
+static void	lock_spin(struct lock_object *lock, int how);
+static int	unlock_mtx(struct lock_object *lock);
+static int	unlock_spin(struct lock_object *lock);
 
+
 /*
  * Lock classes for sleep and spin mutexes.
  */
 struct lock_class lock_class_mtx_sleep = {
-	"sleep mutex",
-	LC_SLEEPLOCK | LC_RECURSABLE,
+	.lc_name = "sleep mutex",
+	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE,
 #ifdef DDB
-	db_show_mtx
+	.lc_ddb_show = db_show_mtx,
 #endif
+	.lc_lock = lock_mtx,
+	.lc_unlock = unlock_mtx,
 };
 struct lock_class lock_class_mtx_spin = {
-	"spin mutex",
-	LC_SPINLOCK | LC_RECURSABLE,
+	.lc_name = "spin mutex",
+	.lc_flags = LC_SPINLOCK | LC_RECURSABLE,
 #ifdef DDB
-	db_show_mtx
+	.lc_ddb_show = db_show_mtx,
 #endif
+	.lc_lock = lock_spin,
+	.lc_unlock = unlock_spin,
 };
 
+void
+lock_mtx(struct lock_object *lock, int how)
+{
+
+	mtx_lock((struct mtx *)lock);
+}
+
+void
+lock_spin(struct lock_object *lock, int how)
+{
+
+	panic("spin locks can only use msleep_spin");
+}
+
+int
+unlock_mtx(struct lock_object *lock)
+{
+	struct mtx *m;
+
+	m = (struct mtx *)lock;
+	mtx_assert(m, MA_OWNED | MA_NOTRECURSED);
+	mtx_unlock(m);
+	return (0);
+}
+
+int
+unlock_spin(struct lock_object *lock)
+{
+
+	panic("spin locks can only use msleep_spin");
+}
+
 /*
  * System-wide mutexes
  */
Index: kern/kern_synch.c
===================================================================
--- kern/kern_synch.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_synch.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -64,11 +64,18 @@
 
 #include <machine/cpu.h>
 
+#ifdef XEN
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#endif
+
 static void synch_setup(void *dummy);
 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, NULL)
 
 int	hogticks;
 int	lbolt;
+static int pause_wchan;
 
 static struct callout loadav_callout;
 static struct callout lbolt_callout;
@@ -100,7 +107,144 @@
 	init_sleepqueues();
 }
 
+
 /*
+ * General sleep call.  Suspends the current thread until a wakeup is
+ * performed on the specified identifier.  The thread will then be made
+ * runnable with the specified priority.  Sleeps at most timo/hz seconds
+ * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
+ * before and after sleeping, else signals are not checked.  Returns 0 if
+ * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
+ * signal needs to be delivered, ERESTART is returned if the current system
+ * call should be restarted if possible, and EINTR is returned if the system
+ * call should be interrupted by the signal (return EINTR).
+ *
+ * The lock argument is unlocked before the caller is suspended, and
+ * re-locked before _sleep() returns.  If priority includes the PDROP
+ * flag the lock is not re-locked before returning.
+ */
+int
+_sleep(void *ident, struct lock_object *lock, int priority,
+    const char *wmesg, int timo)
+{
+	struct thread *td;
+	struct proc *p;
+	struct lock_class *class;
+	int catch, flags, lock_state, pri, rval;
+	WITNESS_SAVE_DECL(lock_witness);
+
+	td = curthread;
+	p = td->td_proc;
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(1, 0);
+#endif
+	WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
+	    "Sleeping on \"%s\"", wmesg);
+	KASSERT(timo != 0 || mtx_owned(&Giant) || lock != NULL ||
+	    ident == &lbolt, ("sleeping without a lock"));
+	KASSERT(p != NULL, ("msleep1"));
+	KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep"));
+	if (lock != NULL)
+		class = LOCK_CLASS(lock);
+	else
+		class = NULL;
+
+	if (cold) {
+		/*
+		 * During autoconfiguration, just return;
+		 * don't run any other threads or panic below,
+		 * in case this is the idle thread and already asleep.
+		 * XXX: this used to do "s = splhigh(); splx(safepri);
+		 * splx(s);" to give interrupts a chance, but there is
+		 * no way to give interrupts a chance now.
+		 */
+		if (lock != NULL && priority & PDROP)
+			class->lc_unlock(lock);
+		return (0);
+	}
+	catch = priority & PCATCH;
+	rval = 0;
+
+	/*
+	 * If we are already on a sleep queue, then remove us from that
+	 * sleep queue first.  We have to do this to handle recursive
+	 * sleeps.
+	 */
+	if (TD_ON_SLEEPQ(td))
+		sleepq_remove(td, td->td_wchan);
+
+	if (ident == &pause_wchan)
+		flags = SLEEPQ_PAUSE;
+	else
+		flags = SLEEPQ_SLEEP;
+	if (catch)
+		flags |= SLEEPQ_INTERRUPTIBLE;
+
+	sleepq_lock(ident);
+	CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
+	    td->td_tid, p->p_pid, p->p_comm, wmesg, ident);
+
+	DROP_GIANT();
+	if (lock != NULL && !(class->lc_flags & LC_SLEEPABLE)) {
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+	} else
+		/* GCC needs to follow the Yellow Brick Road */
+		lock_state = -1;
+
+	/*
+	 * We put ourselves on the sleep queue and start our timeout
+	 * before calling thread_suspend_check, as we could stop there,
+	 * and a wakeup or a SIGCONT (or both) could occur while we were
+	 * stopped without resuming us.  Thus, we must be ready for sleep
+	 * when cursig() is called.  If the wakeup happens while we're
+	 * stopped, then td will no longer be on a sleep queue upon
+	 * return from cursig().
+	 */
+	sleepq_add(ident, ident == &lbolt ? NULL : lock, wmesg, flags, 0);
+	if (timo)
+		sleepq_set_timeout(ident, timo);
+	if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
+		sleepq_release(ident);
+		WITNESS_SAVE(lock, lock_witness);
+		lock_state = class->lc_unlock(lock);
+		sleepq_lock(ident);
+	}
+
+	/*
+	 * Adjust this thread's priority, if necessary.
+	 */
+	pri = priority & PRIMASK;
+	if (pri != 0 && pri != td->td_priority) {
+		mtx_lock_spin(&sched_lock);
+		sched_prio(td, pri);
+		mtx_unlock_spin(&sched_lock);
+	}
+
+	if (timo && catch)
+		rval = sleepq_timedwait_sig(ident);
+	else if (timo)
+		rval = sleepq_timedwait(ident);
+	else if (catch)
+		rval = sleepq_wait_sig(ident);
+	else {
+		sleepq_wait(ident);
+		rval = 0;
+	}
+#ifdef KTRACE
+	if (KTRPOINT(td, KTR_CSW))
+		ktrcsw(0, 0);
+#endif
+	PICKUP_GIANT();
+	if (lock != NULL && !(priority & PDROP)) {
+		class->lc_lock(lock, lock_state);
+		WITNESS_RESTORE(lock, lock_witness);
+	}
+	return (rval);
+}
+
+/*
  * General sleep call.  Suspends the current process until a wakeup is
  * performed on the specified identifier.  The process will then be made
  * runnable with the specified priority.  Sleeps at most timo/hz seconds
@@ -164,7 +308,7 @@
 	if (TD_ON_SLEEPQ(td))
 		sleepq_remove(td, td->td_wchan);
 
-	flags = SLEEPQ_MSLEEP;
+	flags = SLEEPQ_SLEEP;
 	if (catch)
 		flags |= SLEEPQ_INTERRUPTIBLE;
 
@@ -265,7 +409,7 @@
 	/*
 	 * We put ourselves on the sleep queue and start our timeout.
 	 */
-	sleepq_add(ident, &mtx->mtx_object, wmesg, SLEEPQ_MSLEEP, 0);
+	sleepq_add(ident, &mtx->mtx_object, wmesg, SLEEPQ_SLEEP, 0);
 	if (timo)
 		sleepq_set_timeout(ident, timo);
 
@@ -314,7 +458,7 @@
 {
 
 	sleepq_lock(ident);
-	sleepq_broadcast(ident, SLEEPQ_MSLEEP, -1, 0);
+	sleepq_broadcast(ident, SLEEPQ_SLEEP, -1, 0);
 }
 
 /*
@@ -328,7 +472,7 @@
 {
 
 	sleepq_lock(ident);
-	sleepq_signal(ident, SLEEPQ_MSLEEP, -1, 0);
+	sleepq_signal(ident, SLEEPQ_SLEEP, -1, 0);
 }
 
 /*
@@ -417,6 +561,9 @@
 		    td, td->td_proc->p_comm, td->td_priority,
 		    td->td_inhibitors, td->td_wmesg, td->td_lockname);
 #endif
+#ifdef XEN
+	PT_UPDATES_FLUSH();
+#endif
 	sched_switch(td, newtd, flags);
 	CTR3(KTR_SCHED, "mi_switch: running %p(%s) prio %d",
 	    td, td->td_proc->p_comm, td->td_priority);
Index: kern/subr_trap.c
===================================================================
--- kern/subr_trap.c	(.../stable/6/sys)	(revision 184012)
+++ kern/subr_trap.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -67,6 +67,12 @@
 #include <machine/cpu.h>
 #include <machine/pcb.h>
 
+#ifdef XEN
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#endif
+
 /*
  * Define the code needed before returning to user mode, for
  * trap and syscall.
@@ -139,6 +145,9 @@
 	sched_userret(td);
 	KASSERT(td->td_locks == 0,
 	    ("userret: Returning with %d locks held.", td->td_locks));
+#ifdef XEN
+	PT_UPDATES_FLUSH();
+#endif
 }
 
 /*
Index: kern/kern_rwlock.c
===================================================================
--- kern/kern_rwlock.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_rwlock.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -60,13 +60,19 @@
 static void	db_show_rwlock(struct lock_object *lock);
 #endif
 
+static void	lock_rw(struct lock_object *lock, int how);
+static int	unlock_rw(struct lock_object *lock);
+
 struct lock_class lock_class_rw = {
 	.lc_name = "rw",
 	.lc_flags = LC_SLEEPLOCK | LC_RECURSABLE | LC_UPGRADABLE,
 #ifdef DDB
 	.lc_ddb_show = db_show_rwlock,
 #endif
+	.lc_lock = lock_rw,
+	.lc_unlock = unlock_rw
 };
+      
 
 /*
  * Return a pointer to the owning thread if the lock is write-locked or
@@ -99,6 +105,34 @@
 #endif
 
 void
+lock_rw(struct lock_object *lock, int how)
+{
+	struct rwlock *rw;
+
+	rw = (struct rwlock *)lock;
+	if (how)
+		rw_wlock(rw);
+	else
+		rw_rlock(rw);
+}
+
+int
+unlock_rw(struct lock_object *lock)
+{
+	struct rwlock *rw;
+
+	rw = (struct rwlock *)lock;
+	rw_assert(rw, RA_LOCKED | LA_NOTRECURSED);
+	if (rw->rw_lock & RW_LOCK_READ) {
+		rw_runlock(rw);
+		return (0);
+	} else {
+		rw_wunlock(rw);
+		return (1);
+	}
+}
+
+void
 rw_init_flags(struct rwlock *rw, const char *name, int opts)
 {
 	int flags;
Index: kern/kern_sx.c
===================================================================
--- kern/kern_sx.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_sx.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -108,12 +108,17 @@
 static void	db_show_sx(struct lock_object *lock);
 #endif
 
+static void	lock_sx(struct lock_object *lock, int how);
+static int	unlock_sx(struct lock_object *lock);
+
 struct lock_class lock_class_sx = {
 	.lc_name = "sx",
 	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
 #ifdef DDB
 	.lc_ddb_show = db_show_sx,
 #endif
+	.lc_lock = lock_sx,
+	.lc_unlock = unlock_sx,
 };
 
 #ifndef INVARIANTS
@@ -121,6 +126,34 @@
 #endif
 
 void
+lock_sx(struct lock_object *lock, int how)
+{
+	struct sx *sx;
+
+	sx = (struct sx *)lock;
+	if (how)
+		sx_xlock(sx);
+	else
+		sx_slock(sx);
+}
+
+int
+unlock_sx(struct lock_object *lock)
+{
+	struct sx *sx;
+
+	sx = (struct sx *)lock;
+	sx_assert(sx, SA_LOCKED | SA_NOTRECURSED);
+	if (sx_xlocked(sx)) {
+		sx_xunlock(sx);
+		return (1);
+	} else {
+		sx_sunlock(sx);
+		return (0);
+	}
+}
+
+void
 sx_sysinit(void *arg)
 {
 	struct sx_args *sargs = arg;
@@ -845,6 +878,7 @@
 	}
 }
 
+#if 0
 /*
  * Atomically drop an sx lock while going to sleep.  This is just a hack
  * for 6.x.  In 7.0 and later this is done more cleanly.
@@ -961,6 +995,7 @@
 	}
 	return (rval);
 }
+#endif
 
 #ifdef INVARIANT_SUPPORT
 #ifndef INVARIANTS
Index: kern/kern_fork.c
===================================================================
--- kern/kern_fork.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_fork.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -112,10 +112,15 @@
 	struct thread *td;
 	struct vfork_args *uap;
 {
-	int error;
+	int error, flags;
 	struct proc *p2;
 
-	error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2);
+#ifdef XEN
+	flags = RFFDG | RFPROC;
+#else
+	flags = RFFDG | RFPROC | RFPPWAIT | RFMEM;
+#endif	
+	error = fork1(td, flags, 0, &p2);
 	if (error == 0) {
 		td->td_retval[0] = p2->p_pid;
 		td->td_retval[1] = 0;
Index: kern/kern_lock.c
===================================================================
--- kern/kern_lock.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_lock.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -62,11 +62,48 @@
 #include <ddb/ddb.h>
 #endif
 
+
+#ifdef DDB
+#include <ddb/ddb.h>
+static void	db_show_lockmgr(struct lock_object *lock);
+#endif
+static void	lock_lockmgr(struct lock_object *lock, int how);
+static int	unlock_lockmgr(struct lock_object *lock);
+
+struct lock_class lock_class_lockmgr = {
+	.lc_name = "lockmgr",
+	.lc_flags = LC_SLEEPLOCK | LC_SLEEPABLE | LC_RECURSABLE | LC_UPGRADABLE,
+#ifdef DDB
+	.lc_ddb_show = db_show_lockmgr,
+#endif
+	.lc_lock = lock_lockmgr,
+	.lc_unlock = unlock_lockmgr,
+};
+
 /*
  * Locking primitives implementation.
  * Locks provide shared/exclusive sychronization.
  */
 
+void
+lock_lockmgr(struct lock_object *lock, int how)
+{
+
+	panic("lockmgr locks do not support sleep interlocking");
+}
+
+int
+unlock_lockmgr(struct lock_object *lock)
+{
+
+	panic("lockmgr locks do not support sleep interlocking");
+}
+
+/*
+ * Locking primitives implementation.
+ * Locks provide shared/exclusive sychronization.
+ */
+
 #define	COUNT(td, x)	if ((td)) (td)->td_locks += (x)
 #define LK_ALL (LK_HAVE_EXCL | LK_WANT_EXCL | LK_WANT_UPGRADE | \
 	LK_SHARE_NONZERO | LK_WAIT_NONZERO)
@@ -639,14 +676,13 @@
 	return (1);
 }
 
-DB_SHOW_COMMAND(lockmgr, db_show_lockmgr)
+void
+db_show_lockmgr(struct lock_object *lock)
 {
 	struct thread *td;
 	struct lock *lkp;
 
-	if (!have_addr)
-		return;
-	lkp = (struct lock *)addr;
+	lkp = (struct lock *)lock;
 
 	db_printf("lock type: %s\n", lkp->lk_wmesg);
 	db_printf("state: ");
Index: kern/kern_condvar.c
===================================================================
--- kern/kern_condvar.c	(.../stable/6/sys)	(revision 184012)
+++ kern/kern_condvar.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -124,8 +124,7 @@
 	DROP_GIANT();
 	mtx_unlock(mp);
 
-	sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR,
-	    0);
+	sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_wait(cvp);
 
 #ifdef KTRACE
@@ -232,8 +231,7 @@
 	DROP_GIANT();
 	mtx_unlock(mp);
 
-	sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR,
-	    0);
+	sleepq_add(cvp, &mp->mtx_object, cvp->cv_description, SLEEPQ_CONDVAR, 0);
 	sleepq_set_timeout(cvp, timo);
 	rval = sleepq_timedwait(cvp);
 
Index: dev/xen/netfront/mbufq.h
===================================================================
--- dev/xen/netfront/mbufq.h	(.../stable/6/sys)	(revision 0)
+++ dev/xen/netfront/mbufq.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,123 @@
+/**************************************************************************
+
+Copyright (c) 2007, Chelsio Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+
+ 2. Neither the name of the Chelsio Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+$FreeBSD$
+
+***************************************************************************/
+
+#ifndef CXGB_MBUFQ_H_
+#define CXGB_MBUFQ_H_
+
+struct mbuf_head {
+	struct mbuf *head;
+	struct mbuf *tail;
+	uint32_t     qlen;
+	uint32_t     qsize;
+	struct mtx   lock;
+};
+
+static __inline void
+mbufq_init(struct mbuf_head *l)
+{
+	l->head = l->tail = NULL;
+	l->qlen = l->qsize = 0;
+}
+
+static __inline int
+mbufq_empty(struct mbuf_head *l)
+{
+	return (l->head == NULL);
+}
+
+static __inline int
+mbufq_len(struct mbuf_head *l)
+{
+	return (l->qlen);
+}
+
+static __inline int
+mbufq_size(struct mbuf_head *l)
+{
+	return (l->qsize);
+}
+
+static __inline int
+mbufq_head_size(struct mbuf_head *l)
+{
+	return (l->head ? l->head->m_pkthdr.len : 0);
+}
+
+static __inline void
+mbufq_tail(struct mbuf_head *l, struct mbuf *m)
+{
+	l->qlen++;
+	if (l->head == NULL)
+		l->head = m;
+	else
+		l->tail->m_nextpkt = m;
+	l->tail = m;
+	l->qsize += m->m_pkthdr.len;
+}
+
+static __inline struct mbuf *
+mbufq_dequeue(struct mbuf_head *l)
+{
+	struct mbuf *m;
+
+	m = l->head;
+	if (m) {
+		if (m == l->tail) 
+			l->head = l->tail = NULL;
+		else
+			l->head = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+		l->qlen--;
+		l->qsize -= m->m_pkthdr.len;
+	}
+
+	return (m);
+}
+
+static __inline struct mbuf *
+mbufq_peek(struct mbuf_head *l)
+{
+	return (l->head);
+}
+
+static __inline void
+mbufq_append(struct mbuf_head *a, struct mbuf_head *b)
+{
+	if (a->tail) 
+		a->tail->m_nextpkt = b->head;
+	if (b->tail)
+		a->tail = b->tail;
+	a->qlen += b->qlen;
+	a->qsize += b->qsize;
+	
+	
+}
+#endif  /* CXGB_MBUFQ_H_ */

Property changes on: dev/xen/netfront/mbufq.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/netfront/netfront.c
===================================================================
--- dev/xen/netfront/netfront.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/netfront/netfront.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1988 @@
+/*
+ *
+ * Copyright (c) 2004-2006 Kip Macy
+ * All rights reserved.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+#include <sys/queue.h>
+#include <sys/lock.h>
+#include <sys/sx.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/ethernet.h>
+#include <net/if_dl.h>
+#include <net/if_media.h>
+
+#include <net/bpf.h>
+
+#include <net/if_types.h>
+#include <net/if.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/if_ether.h>
+#if __FreeBSD_version >= 700000
+#include <netinet/tcp.h>
+#include <netinet/tcp_lro.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/clock.h>      /* for DELAY */
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+#include <machine/vmparam.h>
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <xen/gnttab.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/io/netif.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <dev/xen/netfront/mbufq.h>
+
+#include "xenbus_if.h"
+
+#define XN_CSUM_FEATURES	(CSUM_TCP | CSUM_UDP | CSUM_TSO)
+
+#define GRANT_INVALID_REF	0
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+#if __FreeBSD_version >= 700000
+/*
+ * Should the driver do LRO on the RX end
+ *  this can be toggled on the fly, but the
+ *  interface must be reset (down/up) for it
+ *  to take effect.
+ */
+static int xn_enable_lro = 1;
+TUNABLE_INT("hw.xn.enable_lro", &xn_enable_lro);
+#else
+
+#define IFCAP_TSO4	0
+#define CSUM_TSO	0
+
+#endif
+
+#ifdef CONFIG_XEN
+static int MODPARM_rx_copy = 0;
+module_param_named(rx_copy, MODPARM_rx_copy, bool, 0);
+MODULE_PARM_DESC(rx_copy, "Copy packets from network card (rather than flip)");
+static int MODPARM_rx_flip = 0;
+module_param_named(rx_flip, MODPARM_rx_flip, bool, 0);
+MODULE_PARM_DESC(rx_flip, "Flip packets from network card (rather than copy)");
+#else
+static const int MODPARM_rx_copy = 1;
+static const int MODPARM_rx_flip = 0;
+#endif
+
+#define MAX_SKB_FRAGS	(65536/PAGE_SIZE + 2)
+#define RX_COPY_THRESHOLD 256
+
+#define net_ratelimit() 0
+
+struct netfront_info;
+struct netfront_rx_info;
+
+static void xn_txeof(struct netfront_info *);
+static void xn_rxeof(struct netfront_info *);
+static void network_alloc_rx_buffers(struct netfront_info *);
+
+static void xn_tick_locked(struct netfront_info *);
+static void xn_tick(void *);
+
+static void xn_intr(void *);
+static void xn_start_locked(struct ifnet *);
+static void xn_start(struct ifnet *);
+static int  xn_ioctl(struct ifnet *, u_long, caddr_t);
+static void xn_ifinit_locked(struct netfront_info *);
+static void xn_ifinit(void *);
+static void xn_stop(struct netfront_info *);
+#ifdef notyet
+static void xn_watchdog(struct ifnet *);
+#endif
+
+static void show_device(struct netfront_info *sc);
+#ifdef notyet
+static void netfront_closing(device_t dev);
+#endif
+static void netif_free(struct netfront_info *info);
+static int netfront_detach(device_t dev);
+
+static int talk_to_backend(device_t dev, struct netfront_info *info);
+static int create_netdev(device_t dev);
+static void netif_disconnect_backend(struct netfront_info *info);
+static int setup_device(device_t dev, struct netfront_info *info);
+static void end_access(int ref, void *page);
+
+/* Xenolinux helper functions */
+int network_connect(struct netfront_info *);
+
+static void xn_free_rx_ring(struct netfront_info *);
+
+static void xn_free_tx_ring(struct netfront_info *);
+
+static int xennet_get_responses(struct netfront_info *np,
+	struct netfront_rx_info *rinfo, RING_IDX rp, struct mbuf **list,
+	int *pages_flipped_p);
+
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+#define INVALID_P2M_ENTRY (~0UL)
+
+/*
+ * Mbuf pointers. We need these to keep track of the virtual addresses
+ * of our mbuf chains since we can only convert from virtual to physical,
+ * not the other way around.  The size must track the free index arrays.
+ */
+struct xn_chain_data {
+		struct mbuf		*xn_tx_chain[NET_TX_RING_SIZE+1];
+		struct mbuf		*xn_rx_chain[NET_RX_RING_SIZE+1];
+};
+
+
+struct net_device_stats
+{
+	u_long	rx_packets;		/* total packets received	*/
+	u_long	tx_packets;		/* total packets transmitted	*/
+	u_long	rx_bytes;		/* total bytes received 	*/
+	u_long	tx_bytes;		/* total bytes transmitted	*/
+	u_long	rx_errors;		/* bad packets received		*/
+	u_long	tx_errors;		/* packet transmit problems	*/
+	u_long	rx_dropped;		/* no space in linux buffers	*/
+	u_long	tx_dropped;		/* no space available in linux	*/
+	u_long	multicast;		/* multicast packets received	*/
+	u_long	collisions;
+
+	/* detailed rx_errors: */
+	u_long	rx_length_errors;
+	u_long	rx_over_errors;		/* receiver ring buff overflow	*/
+	u_long	rx_crc_errors;		/* recved pkt with crc error	*/
+	u_long	rx_frame_errors;	/* recv'd frame alignment error */
+	u_long	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	u_long	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	u_long	tx_aborted_errors;
+	u_long	tx_carrier_errors;
+	u_long	tx_fifo_errors;
+	u_long	tx_heartbeat_errors;
+	u_long	tx_window_errors;
+	
+	/* for cslip etc */
+	u_long	rx_compressed;
+	u_long	tx_compressed;
+};
+
+struct netfront_info {
+		
+	struct ifnet *xn_ifp;
+#if __FreeBSD_version >= 700000
+	struct lro_ctrl xn_lro;
+#endif
+
+	struct net_device_stats stats;
+	u_int tx_full;
+
+	netif_tx_front_ring_t tx;
+	netif_rx_front_ring_t rx;
+
+	struct mtx   tx_lock;
+	struct mtx   rx_lock;
+	struct sx    sc_lock;
+
+	u_int handle;
+	u_int irq;
+	u_int copying_receiver;
+	u_int carrier;
+		
+	/* Receive-ring batched refills. */
+#define RX_MIN_TARGET 32
+#define RX_MAX_TARGET NET_RX_RING_SIZE
+	int rx_min_target, rx_max_target, rx_target;
+
+	/*
+	 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
+	 * array is an index into a chain of free entries.
+	 */
+
+	grant_ref_t gref_tx_head;
+	grant_ref_t grant_tx_ref[NET_TX_RING_SIZE + 1]; 
+	grant_ref_t gref_rx_head;
+	grant_ref_t grant_rx_ref[NET_TX_RING_SIZE + 1]; 
+
+#define TX_MAX_TARGET min(NET_RX_RING_SIZE, 256)
+	device_t xbdev;
+	int tx_ring_ref;
+	int rx_ring_ref;
+	uint8_t mac[ETHER_ADDR_LEN];
+	struct xn_chain_data	xn_cdata;	/* mbufs */
+	struct mbuf_head xn_rx_batch;	/* head of the batch queue */
+
+	int			xn_if_flags;
+	struct callout	        xn_stat_ch;
+
+	u_long rx_pfn_array[NET_RX_RING_SIZE];
+	multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+	mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+};
+
+#define rx_mbufs xn_cdata.xn_rx_chain
+#define tx_mbufs xn_cdata.xn_tx_chain
+
+#define XN_LOCK_INIT(_sc, _name) \
+        mtx_init(&(_sc)->tx_lock, #_name"_tx", "network transmit lock", MTX_DEF); \
+        mtx_init(&(_sc)->rx_lock, #_name"_rx", "network receive lock", MTX_DEF);  \
+        sx_init(&(_sc)->sc_lock, #_name"_rx")
+
+#define XN_RX_LOCK(_sc)           mtx_lock(&(_sc)->rx_lock)
+#define XN_RX_UNLOCK(_sc)         mtx_unlock(&(_sc)->rx_lock)
+
+#define XN_TX_LOCK(_sc)           mtx_lock(&(_sc)->tx_lock)
+#define XN_TX_UNLOCK(_sc)         mtx_unlock(&(_sc)->tx_lock)
+
+#define XN_LOCK(_sc)           sx_xlock(&(_sc)->sc_lock); 
+#define XN_UNLOCK(_sc)         sx_xunlock(&(_sc)->sc_lock); 
+
+#define XN_LOCK_ASSERT(_sc)    sx_assert(&(_sc)->sc_lock, SX_LOCKED); 
+#define XN_RX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->rx_lock, MA_OWNED); 
+#define XN_TX_LOCK_ASSERT(_sc)    mtx_assert(&(_sc)->tx_lock, MA_OWNED); 
+#define XN_LOCK_DESTROY(_sc)   mtx_destroy(&(_sc)->rx_lock); \
+                               mtx_destroy(&(_sc)->tx_lock); \
+                               sx_destroy(&(_sc)->sc_lock);
+
+struct netfront_rx_info {
+	struct netif_rx_response rx;
+	struct netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+};
+
+#define netfront_carrier_on(netif)	((netif)->carrier = 1)
+#define netfront_carrier_off(netif)	((netif)->carrier = 0)
+#define netfront_carrier_ok(netif)	((netif)->carrier)
+
+/* Access macros for acquiring freeing slots in xn_free_{tx,rx}_idxs[]. */
+
+
+
+/*
+ * Access macros for acquiring freeing slots in tx_skbs[].
+ */
+
+static inline void
+add_id_to_freelist(struct mbuf **list, unsigned short id)
+{
+	list[id] = list[0];
+	list[0]  = (void *)(u_long)id;
+}
+
+static inline unsigned short
+get_id_from_freelist(struct mbuf **list)
+{
+	u_int id = (u_int)(u_long)list[0];
+	list[0] = list[id];
+	return (id);
+}
+
+static inline int
+xennet_rxidx(RING_IDX idx)
+{
+	return idx & (NET_RX_RING_SIZE - 1);
+}
+
+static inline struct mbuf *
+xennet_get_rx_mbuf(struct netfront_info *np,
+						RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	struct mbuf *m;
+
+	m = np->rx_mbufs[i];
+	np->rx_mbufs[i] = NULL;
+	return (m);
+}
+
+static inline grant_ref_t
+xennet_get_rx_ref(struct netfront_info *np, RING_IDX ri)
+{
+	int i = xennet_rxidx(ri);
+	grant_ref_t ref = np->grant_rx_ref[i];
+	np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	return ref;
+}
+
+#ifdef DEBUG
+
+#endif
+#define IPRINTK(fmt, args...) \
+    printf("[XEN] " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+    printf("[XEN] " fmt, ##args)
+#if 0
+#define DPRINTK(fmt, args...) \
+    printf("[XEN] %s: " fmt, __func__, ##args)
+#else
+#define DPRINTK(fmt, args...)
+#endif
+
+/**
+ * Read the 'mac' node at the given device's node in the store, and parse that
+ * as colon-separated octets, placing result the given mac array.  mac must be
+ * a preallocated array of length ETH_ALEN (as declared in linux/if_ether.h).
+ * Return 0 on success, or errno on error.
+ */
+static int 
+xen_net_read_mac(device_t dev, uint8_t mac[])
+{
+	int error, i;
+	char *s, *e, *macstr;
+
+	error = xenbus_read(XBT_NIL, xenbus_get_node(dev), "mac", NULL,
+	    (void **) &macstr);
+	if (error)
+		return (error);
+
+	s = macstr;
+	for (i = 0; i < ETHER_ADDR_LEN; i++) {
+		mac[i] = strtoul(s, &e, 16);
+		if (s == e || (e[0] != ':' && e[0] != 0)) {
+			free(macstr, M_DEVBUF);
+			return (ENOENT);
+		}
+		s = &e[1];
+	}
+	free(macstr, M_DEVBUF);
+	return (0);
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the backend, and
+ * inform the backend of the appropriate details for those.  Switch to
+ * Connected state.
+ */
+static int 
+netfront_probe(device_t dev)
+{
+
+	if (!strcmp(xenbus_get_type(dev), "vif")) {
+		device_set_desc(dev, "Virtual Network Interface");
+		return (0);
+	}
+
+	return (ENXIO);
+}
+
+static int
+netfront_attach(device_t dev)
+{	
+	int err;
+
+	err = create_netdev(dev);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "creating netdev");
+		return err;
+	}
+
+#if __FreeBSD_version >= 700000
+	SYSCTL_ADD_INT(device_get_sysctl_ctx(dev),
+	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "enable_lro", CTLTYPE_INT|CTLFLAG_RW,
+	    &xn_enable_lro, 0, "Large Receive Offload");
+#endif
+
+	return 0;
+}
+
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int
+netfront_resume(device_t dev)
+{
+	struct netfront_info *info = device_get_softc(dev);
+
+	netif_disconnect_backend(info);
+	return (0);
+}
+
+
+/* Common code used when first setting up, and when resuming. */
+static int 
+talk_to_backend(device_t dev, struct netfront_info *info)
+{
+	const char *message;
+	struct xenbus_transaction xbt;
+	const char *node = xenbus_get_node(dev);
+	int err;
+
+	err = xen_net_read_mac(dev, info->mac);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac", node);
+		goto out;
+	}
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_device(dev, info);
+	if (err)
+		goto out;
+	
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_ring;
+	}
+	err = xenbus_printf(xbt, node, "tx-ring-ref","%u",
+			    info->tx_ring_ref);
+	if (err) {
+		message = "writing tx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, node, "rx-ring-ref","%u",
+			    info->rx_ring_ref);
+	if (err) {
+		message = "writing rx ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, node,
+		"event-channel", "%u", irq_to_evtchn_port(info->irq));
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, node, "request-rx-copy", "%u",
+			    info->copying_receiver);
+	if (err) {
+		message = "writing request-rx-copy";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, node, "feature-rx-notify", "%d", 1);
+	if (err) {
+		message = "writing feature-rx-notify";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, node, "feature-sg", "%d", 1);
+	if (err) {
+		message = "writing feature-sg";
+		goto abort_transaction;
+	}
+#if __FreeBSD_version >= 700000
+	err = xenbus_printf(xbt, node, "feature-gso-tcpv4", "%d", 1);
+	if (err) {
+		message = "writing feature-gso-tcpv4";
+		goto abort_transaction;
+	}
+#endif
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_ring;
+	}
+	
+	return 0;
+	
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_ring:
+	netif_free(info);
+ out:
+	return err;
+}
+
+
+static int 
+setup_device(device_t dev, struct netfront_info *info)
+{
+	netif_tx_sring_t *txs;
+	netif_rx_sring_t *rxs;
+	int error;
+	struct ifnet *ifp;
+	
+	ifp = info->xn_ifp;
+
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->rx.sring = NULL;
+	info->tx.sring = NULL;
+	info->irq = 0;
+
+	txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (!txs) {
+		error = ENOMEM;
+		xenbus_dev_fatal(dev, error, "allocating tx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(txs);
+	FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE);
+	error = xenbus_grant_ring(dev, virt_to_mfn(txs), &info->tx_ring_ref);
+	if (error)
+		goto fail;
+
+	rxs = (netif_rx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (!rxs) {
+		error = ENOMEM;
+		xenbus_dev_fatal(dev, error, "allocating rx ring page");
+		goto fail;
+	}
+	SHARED_RING_INIT(rxs);
+	FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE);
+
+	error = xenbus_grant_ring(dev, virt_to_mfn(rxs), &info->rx_ring_ref);
+	if (error)
+		goto fail;
+
+	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
+	    "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq);
+
+	if (error) {
+		xenbus_dev_fatal(dev, error,
+				 "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+
+	show_device(info);
+	
+	return (0);
+	
+ fail:
+	netif_free(info);
+	return (error);
+}
+
+/**
+ * If this interface has an ipv4 address, send an arp for it. This
+ * helps to get the network going again after migrating hosts.
+ */
+static void
+netfront_send_fake_arp(device_t dev, struct netfront_info *info)
+{
+	struct ifnet *ifp;
+	struct ifaddr *ifa;
+	
+	ifp = info->xn_ifp;
+	TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
+		if (ifa->ifa_addr->sa_family == AF_INET) {
+			arp_ifinit(ifp, ifa);
+		}
+	}
+}
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void
+netfront_backend_changed(device_t dev, XenbusState newstate)
+{
+	struct netfront_info *sc = device_get_softc(dev);
+		
+	DPRINTK("newstate=%d\n", newstate);
+
+	switch (newstate) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+	case XenbusStateUnknown:
+	case XenbusStateClosed:
+	case XenbusStateReconfigured:
+	case XenbusStateReconfiguring:
+		break;
+	case XenbusStateInitWait:
+		if (xenbus_get_state(dev) != XenbusStateInitialising)
+			break;
+		if (network_connect(sc) != 0)
+			break;
+		xenbus_set_state(dev, XenbusStateConnected);
+		netfront_send_fake_arp(dev, sc);
+		break;
+	case XenbusStateClosing:
+		xenbus_set_state(dev, XenbusStateClosed);
+		break;
+	}
+}
+
+static void
+xn_free_rx_ring(struct netfront_info *sc)
+{
+#if 0
+	int i;
+	
+	for (i = 0; i < NET_RX_RING_SIZE; i++) {
+		if (sc->xn_cdata.xn_rx_chain[i] != NULL) {
+			m_freem(sc->xn_cdata.xn_rx_chain[i]);
+			sc->xn_cdata.xn_rx_chain[i] = NULL;
+		}
+	}
+	
+	sc->rx.rsp_cons = 0;
+	sc->xn_rx_if->req_prod = 0;
+	sc->xn_rx_if->event = sc->rx.rsp_cons ;
+#endif
+}
+
+static void
+xn_free_tx_ring(struct netfront_info *sc)
+{
+#if 0
+	int i;
+	
+	for (i = 0; i < NET_TX_RING_SIZE; i++) {
+		if (sc->xn_cdata.xn_tx_chain[i] != NULL) {
+			m_freem(sc->xn_cdata.xn_tx_chain[i]);
+			sc->xn_cdata.xn_tx_chain[i] = NULL;
+		}
+	}
+	
+	return;
+#endif
+}
+
+static inline int
+netfront_tx_slot_available(struct netfront_info *np)
+{
+	return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
+		(TX_MAX_TARGET - /* MAX_SKB_FRAGS */ 24 - 2));
+}
+static void
+netif_release_tx_bufs(struct netfront_info *np)
+{
+	struct mbuf *m;
+	int i;
+
+	for (i = 1; i <= NET_TX_RING_SIZE; i++) {
+		m = np->xn_cdata.xn_tx_chain[i];
+
+		if (((u_long)m) < KERNBASE)
+			continue;
+		gnttab_grant_foreign_access_ref(np->grant_tx_ref[i],
+		    xenbus_get_otherend_id(np->xbdev),
+		    virt_to_mfn(mtod(m, vm_offset_t)),
+		    GNTMAP_readonly);
+		gnttab_release_grant_reference(&np->gref_tx_head,
+		    np->grant_tx_ref[i]);
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;
+		add_id_to_freelist(np->tx_mbufs, i);
+		m_freem(m);
+	}
+}
+
+static void
+network_alloc_rx_buffers(struct netfront_info *sc)
+{
+	int otherend_id = xenbus_get_otherend_id(sc->xbdev);
+	unsigned short id;
+	struct mbuf *m_new;
+	int i, batch_target, notify;
+	RING_IDX req_prod;
+	struct xen_memory_reservation reservation;
+	grant_ref_t ref;
+	int nr_flips;
+	netif_rx_request_t *req;
+	vm_offset_t vaddr;
+	u_long pfn;
+	
+	req_prod = sc->rx.req_prod_pvt;
+
+	if (unlikely(sc->carrier == 0))
+		return;
+	
+	/*
+	 * Allocate skbuffs greedily, even though we batch updates to the
+	 * receive ring. This creates a less bursty demand on the memory
+	 * allocator, so should reduce the chance of failed allocation
+	 * requests both for ourself and for other kernel subsystems.
+	 */
+	batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons);
+	for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) {
+		MGETHDR(m_new, M_DONTWAIT, MT_DATA);
+		if (m_new == NULL) 
+			goto no_mbuf;
+
+		m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE);
+		if ((m_new->m_flags & M_EXT) == 0) {
+			m_freem(m_new);
+
+no_mbuf:
+			if (i != 0)
+				goto refill;
+			/*
+			 * XXX set timer
+			 */
+			break;
+		}
+		m_new->m_len = m_new->m_pkthdr.len = MJUMPAGESIZE;
+		
+		/* queue the mbufs allocated */
+		mbufq_tail(&sc->xn_rx_batch, m_new);
+	}
+	
+	/* Is the batch large enough to be worthwhile? */
+	if (i < (sc->rx_target/2)) {
+		if (req_prod >sc->rx.sring->req_prod)
+			goto push;
+		return;
+	}
+	/* Adjust floating fill target if we risked running out of buffers. */
+	if ( ((req_prod - sc->rx.sring->rsp_prod) < (sc->rx_target / 4)) &&
+	     ((sc->rx_target *= 2) > sc->rx_max_target) )
+		sc->rx_target = sc->rx_max_target;
+
+refill:
+	for (nr_flips = i = 0; ; i++) {
+		if ((m_new = mbufq_dequeue(&sc->xn_rx_batch)) == NULL)
+			break;
+
+		m_new->m_ext.ext_args = (vm_paddr_t *)(uintptr_t)(
+				vtophys(m_new->m_ext.ext_buf) >> PAGE_SHIFT);
+
+		id = xennet_rxidx(req_prod + i);
+
+		KASSERT(sc->xn_cdata.xn_rx_chain[id] == NULL,
+		    ("non-NULL xm_rx_chain"));
+		sc->xn_cdata.xn_rx_chain[id] = m_new;
+
+		ref = gnttab_claim_grant_reference(&sc->gref_rx_head);
+		KASSERT((short)ref >= 0, ("negative ref"));
+		sc->grant_rx_ref[id] = ref;
+
+		vaddr = mtod(m_new, vm_offset_t);
+		pfn = vtophys(vaddr) >> PAGE_SHIFT;
+		req = RING_GET_REQUEST(&sc->rx, req_prod + i);
+
+		if (sc->copying_receiver == 0) {
+			gnttab_grant_foreign_transfer_ref(ref,
+			    otherend_id, pfn);
+			sc->rx_pfn_array[nr_flips] = PFNTOMFN(pfn);
+			if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+				/* Remove this page before passing
+				 * back to Xen.
+				 */
+				set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+				MULTI_update_va_mapping(&sc->rx_mcl[i],
+				    vaddr, 0, 0);
+			}
+			nr_flips++;
+		} else {
+			gnttab_grant_foreign_access_ref(ref,
+			    otherend_id,
+			    PFNTOMFN(pfn), 0);
+		}
+		req->id = id;
+		req->gref = ref;
+		
+		sc->rx_pfn_array[i] =
+		    vtomach(mtod(m_new,vm_offset_t)) >> PAGE_SHIFT;
+	} 
+	
+	KASSERT(i, ("no mbufs processed")); /* should have returned earlier */
+	KASSERT(mbufq_len(&sc->xn_rx_batch) == 0, ("not all mbufs processed"));
+	/*
+	 * We may have allocated buffers which have entries outstanding
+	 * in the page * update queue -- make sure we flush those first!
+	 */
+	PT_UPDATES_FLUSH();
+	if (nr_flips != 0) {
+#ifdef notyet
+		/* Tell the ballon driver what is going on. */
+		balloon_update_driver_allowance(i);
+#endif
+		set_xen_guest_handle(reservation.extent_start, sc->rx_pfn_array);
+		reservation.nr_extents   = i;
+		reservation.extent_order = 0;
+		reservation.address_bits = 0;
+		reservation.domid        = DOMID_SELF;
+
+		if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+
+			/* After all PTEs have been zapped, flush the TLB. */
+			sc->rx_mcl[i-1].args[MULTI_UVMFLAGS_INDEX] =
+			    UVMF_TLB_FLUSH|UVMF_ALL;
+	
+			/* Give away a batch of pages. */
+			sc->rx_mcl[i].op = __HYPERVISOR_memory_op;
+			sc->rx_mcl[i].args[0] = XENMEM_decrease_reservation;
+			sc->rx_mcl[i].args[1] =  (u_long)&reservation;
+			/* Zap PTEs and give away pages in one big multicall. */
+			(void)HYPERVISOR_multicall(sc->rx_mcl, i+1);
+
+			/* Check return status of HYPERVISOR_dom_mem_op(). */
+			if (unlikely(sc->rx_mcl[i].result != i))
+				panic("Unable to reduce memory reservation\n");
+			} else {
+				if (HYPERVISOR_memory_op(
+				    XENMEM_decrease_reservation, &reservation)
+				    != i)
+					panic("Unable to reduce memory "
+					    "reservation\n");
+		}
+	} else {
+		wmb();
+	}
+			
+	/* Above is a suitable barrier to ensure backend will see requests. */
+	sc->rx.req_prod_pvt = req_prod + i;
+push:
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify);
+	if (notify)
+		notify_remote_via_irq(sc->irq);
+}
+
+static void
+xn_rxeof(struct netfront_info *np)
+{
+	struct ifnet *ifp;
+#if __FreeBSD_version >= 700000
+	struct lro_ctrl *lro = &np->xn_lro;
+	struct lro_entry *queued;
+#endif
+	struct netfront_rx_info rinfo;
+	struct netif_rx_response *rx = &rinfo.rx;
+	struct netif_extra_info *extras = rinfo.extras;
+	RING_IDX i, rp;
+	multicall_entry_t *mcl;
+	struct mbuf *m;
+	struct mbuf_head rxq, errq;
+	int err, pages_flipped = 0, work_to_do;
+
+	do {
+		XN_RX_LOCK_ASSERT(np);
+		if (!netfront_carrier_ok(np))
+			return;
+
+		mbufq_init(&errq);
+		mbufq_init(&rxq);
+
+		ifp = np->xn_ifp;
+	
+		rp = np->rx.sring->rsp_prod;
+		rmb();	/* Ensure we see queued responses up to 'rp'. */
+
+		i = np->rx.rsp_cons;
+		while ((i != rp)) {
+			memcpy(rx, RING_GET_RESPONSE(&np->rx, i), sizeof(*rx));
+			memset(extras, 0, sizeof(rinfo.extras));
+
+			m = NULL;
+			err = xennet_get_responses(np, &rinfo, rp, &m,
+			    &pages_flipped);
+
+			if (unlikely(err)) {
+				if (m)
+					mbufq_tail(&errq, m);
+				np->stats.rx_errors++;
+				i = np->rx.rsp_cons;
+				continue;
+			}
+
+			m->m_pkthdr.rcvif = ifp;
+			if ( rx->flags & NETRXF_data_validated ) {
+				/* Tell the stack the checksums are okay */
+				/*
+				 * XXX this isn't necessarily the case - need to add
+				 * check
+				 */
+				
+				m->m_pkthdr.csum_flags |=
+					(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID
+					    | CSUM_PSEUDO_HDR);
+				m->m_pkthdr.csum_data = 0xffff;
+			}
+
+			np->stats.rx_packets++;
+			np->stats.rx_bytes += m->m_pkthdr.len;
+
+			mbufq_tail(&rxq, m);
+			np->rx.rsp_cons = ++i;
+		}
+
+		if (pages_flipped) {
+			/* Some pages are no longer absent... */
+#ifdef notyet
+			balloon_update_driver_allowance(-pages_flipped);
+#endif
+			/* Do all the remapping work, and M->P updates, in one big
+			 * hypercall.
+			 */
+			if (!!xen_feature(XENFEAT_auto_translated_physmap)) {
+				mcl = np->rx_mcl + pages_flipped;
+				mcl->op = __HYPERVISOR_mmu_update;
+				mcl->args[0] = (u_long)np->rx_mmu;
+				mcl->args[1] = pages_flipped;
+				mcl->args[2] = 0;
+				mcl->args[3] = DOMID_SELF;
+				(void)HYPERVISOR_multicall(np->rx_mcl,
+				    pages_flipped + 1);
+			}
+		}
+	
+		while ((m = mbufq_dequeue(&errq)))
+			m_freem(m);
+
+		/* 
+		 * Process all the mbufs after the remapping is complete.
+		 * Break the mbuf chain first though.
+		 */
+		while ((m = mbufq_dequeue(&rxq)) != NULL) {
+			ifp->if_ipackets++;
+			
+			/*
+			 * Do we really need to drop the rx lock?
+			 */
+			XN_RX_UNLOCK(np);
+#if __FreeBSD_version >= 700000
+			/* Use LRO if possible */
+			if ((ifp->if_capenable & IFCAP_LRO) == 0 ||
+			    lro->lro_cnt == 0 || tcp_lro_rx(lro, m, 0)) {
+				/*
+				 * If LRO fails, pass up to the stack
+				 * directly.
+				 */
+				(*ifp->if_input)(ifp, m);
+			}
+#else
+			(*ifp->if_input)(ifp, m);
+#endif
+			XN_RX_LOCK(np);
+		}
+	
+		np->rx.rsp_cons = i;
+
+#if __FreeBSD_version >= 700000
+		/*
+		 * Flush any outstanding LRO work
+		 */
+		while (!SLIST_EMPTY(&lro->lro_active)) {
+			queued = SLIST_FIRST(&lro->lro_active);
+			SLIST_REMOVE_HEAD(&lro->lro_active, next);
+			tcp_lro_flush(lro, queued);
+		}
+#endif
+
+#if 0
+		/* If we get a callback with very few responses, reduce fill target. */
+		/* NB. Note exponential increase, linear decrease. */
+		if (((np->rx.req_prod_pvt - np->rx.sring->rsp_prod) > 
+			((3*np->rx_target) / 4)) && (--np->rx_target < np->rx_min_target))
+			np->rx_target = np->rx_min_target;
+#endif
+	
+		network_alloc_rx_buffers(np);
+
+		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, work_to_do);
+	} while (work_to_do);
+}
+
+static void 
+xn_txeof(struct netfront_info *np)
+{
+	RING_IDX i, prod;
+	unsigned short id;
+	struct ifnet *ifp;
+	netif_tx_response_t *txr;
+	struct mbuf *m;
+	
+	XN_TX_LOCK_ASSERT(np);
+	
+	if (!netfront_carrier_ok(np))
+		return;
+	
+	ifp = np->xn_ifp;
+	ifp->if_timer = 0;
+	
+	do {
+		prod = np->tx.sring->rsp_prod;
+		rmb(); /* Ensure we see responses up to 'rp'. */
+		
+		for (i = np->tx.rsp_cons; i != prod; i++) {
+			txr = RING_GET_RESPONSE(&np->tx, i);
+			if (txr->status == NETIF_RSP_NULL)
+				continue;
+
+			id = txr->id;
+			m = np->xn_cdata.xn_tx_chain[id]; 
+			
+			/*
+			 * Increment packet count if this is the last
+			 * mbuf of the chain.
+			 */
+			if (!m->m_next)
+				ifp->if_opackets++;
+			KASSERT(m != NULL, ("mbuf not found in xn_tx_chain"));
+			M_ASSERTVALID(m);
+			if (unlikely(gnttab_query_foreign_access(
+			    np->grant_tx_ref[id]) != 0)) {
+				printf("network_tx_buf_gc: warning "
+				    "-- grant still in use by backend "
+				    "domain.\n");
+				goto out; 
+			}
+			gnttab_end_foreign_access_ref(
+				np->grant_tx_ref[id]);
+			gnttab_release_grant_reference(
+				&np->gref_tx_head, np->grant_tx_ref[id]);
+			np->grant_tx_ref[id] = GRANT_INVALID_REF;
+			
+			np->xn_cdata.xn_tx_chain[id] = NULL;
+			add_id_to_freelist(np->xn_cdata.xn_tx_chain, id);
+			m_free(m);
+		}
+		np->tx.rsp_cons = prod;
+		
+		/*
+		 * Set a new event, then check for race with update of
+		 * tx_cons. Note that it is essential to schedule a
+		 * callback, no matter how few buffers are pending. Even if
+		 * there is space in the transmit ring, higher layers may
+		 * be blocked because too much data is outstanding: in such
+		 * cases notification from Xen is likely to be the only kick
+		 * that we'll get.
+		 */
+		np->tx.sring->rsp_event =
+		    prod + ((np->tx.sring->req_prod - prod) >> 1) + 1;
+
+		mb();
+		
+	} while (prod != np->tx.sring->rsp_prod);
+	
+ out: 
+	if (np->tx_full &&
+	    ((np->tx.sring->req_prod - prod) < NET_TX_RING_SIZE)) {
+		np->tx_full = 0;
+#if 0
+		if (np->user_state == UST_OPEN)
+			netif_wake_queue(dev);
+#endif
+	}
+
+}
+
+static void
+xn_intr(void *xsc)
+{
+	struct netfront_info *np = xsc;
+	struct ifnet *ifp = np->xn_ifp;
+
+#if 0
+	if (!(np->rx.rsp_cons != np->rx.sring->rsp_prod &&
+	    likely(netfront_carrier_ok(np)) &&
+	    ifp->if_drv_flags & IFF_DRV_RUNNING))
+		return;
+#endif
+	if (np->tx.rsp_cons != np->tx.sring->rsp_prod) {
+		XN_TX_LOCK(np);
+		xn_txeof(np);
+		XN_TX_UNLOCK(np);			
+	}	
+
+	XN_RX_LOCK(np);
+	xn_rxeof(np);
+	XN_RX_UNLOCK(np);
+
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+	    !IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+		xn_start(ifp);
+}
+
+
+static void
+xennet_move_rx_slot(struct netfront_info *np, struct mbuf *m,
+	grant_ref_t ref)
+{
+	int new = xennet_rxidx(np->rx.req_prod_pvt);
+
+	KASSERT(np->rx_mbufs[new] == NULL, ("rx_mbufs != NULL"));
+	np->rx_mbufs[new] = m;
+	np->grant_rx_ref[new] = ref;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->id = new;
+	RING_GET_REQUEST(&np->rx, np->rx.req_prod_pvt)->gref = ref;
+	np->rx.req_prod_pvt++;
+}
+
+static int
+xennet_get_extras(struct netfront_info *np,
+    struct netif_extra_info *extras, RING_IDX rp)
+{
+	struct netif_extra_info *extra;
+	RING_IDX cons = np->rx.rsp_cons;
+
+	int err = 0;
+
+	do {
+		struct mbuf *m;
+		grant_ref_t ref;
+
+		if (unlikely(cons + 1 == rp)) {
+#if 0			
+			if (net_ratelimit())
+				WPRINTK("Missing extra info\n");
+#endif			
+			err = -EINVAL;
+			break;
+		}
+
+		extra = (struct netif_extra_info *)
+		RING_GET_RESPONSE(&np->rx, ++cons);
+
+		if (unlikely(!extra->type ||
+			extra->type >= XEN_NETIF_EXTRA_TYPE_MAX)) {
+#if 0				
+			if (net_ratelimit())
+				WPRINTK("Invalid extra type: %d\n",
+					extra->type);
+#endif			
+			err = -EINVAL;
+		} else {
+			memcpy(&extras[extra->type - 1], extra, sizeof(*extra));
+		}
+
+		m = xennet_get_rx_mbuf(np, cons);
+		ref = xennet_get_rx_ref(np, cons);
+		xennet_move_rx_slot(np, m, ref);
+	} while (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE);
+
+	np->rx.rsp_cons = cons;
+	return err;
+}
+
+static int
+xennet_get_responses(struct netfront_info *np,
+	struct netfront_rx_info *rinfo, RING_IDX rp,
+	struct mbuf  **list,
+	int *pages_flipped_p)
+{
+	int pages_flipped = *pages_flipped_p;
+	struct mmu_update *mmu;
+	struct multicall_entry *mcl;
+	struct netif_rx_response *rx = &rinfo->rx;
+	struct netif_extra_info *extras = rinfo->extras;
+	RING_IDX cons = np->rx.rsp_cons;
+	struct mbuf *m, *m0, *m_prev;
+	grant_ref_t ref = xennet_get_rx_ref(np, cons);
+	int max = 5 /* MAX_SKB_FRAGS + (rx->status <= RX_COPY_THRESHOLD) */;
+	int frags = 1;
+	int err = 0;
+	u_long ret;
+
+	m0 = m = m_prev = xennet_get_rx_mbuf(np, cons);
+
+	
+	if (rx->flags & NETRXF_extra_info) {
+		err = xennet_get_extras(np, extras, rp);
+		cons = np->rx.rsp_cons;
+	}
+
+
+	if (m0 != NULL) {
+			m0->m_pkthdr.len = 0;
+			m0->m_next = NULL;
+	}
+	
+	for (;;) {
+		u_long mfn;
+
+#if 0		
+		printf("rx->status=%hd rx->offset=%hu frags=%u\n",
+			rx->status, rx->offset, frags);
+#endif
+		if (unlikely(rx->status < 0 ||
+			rx->offset + rx->status > PAGE_SIZE)) {
+#if 0						
+			if (net_ratelimit())
+				WPRINTK("rx->offset: %x, size: %u\n",
+					rx->offset, rx->status);
+#endif						
+			xennet_move_rx_slot(np, m, ref);
+			err = -EINVAL;
+			goto next;
+		}
+		
+		/*
+		 * This definitely indicates a bug, either in this driver or in
+		 * the backend driver. In future this should flag the bad
+		 * situation to the system controller to reboot the backed.
+		 */
+		if (ref == GRANT_INVALID_REF) {
+#if 0 				
+			if (net_ratelimit())
+				WPRINTK("Bad rx response id %d.\n", rx->id);
+#endif			
+			err = -EINVAL;
+			goto next;
+		}
+
+		if (!np->copying_receiver) {
+			/* Memory pressure, insufficient buffer
+			 * headroom, ...
+			 */
+			if (!(mfn = gnttab_end_foreign_transfer_ref(ref))) {
+				if (net_ratelimit())
+					WPRINTK("Unfulfilled rx req "
+						"(id=%d, st=%d).\n",
+						rx->id, rx->status);
+				xennet_move_rx_slot(np, m, ref);
+				err = -ENOMEM;
+				goto next;
+			}
+
+			if (!xen_feature( XENFEAT_auto_translated_physmap)) {
+				/* Remap the page. */
+				void *vaddr = mtod(m, void *);
+				uint32_t pfn;
+
+				mcl = np->rx_mcl + pages_flipped;
+				mmu = np->rx_mmu + pages_flipped;
+
+				MULTI_update_va_mapping(mcl, (u_long)vaddr,
+				    (((vm_paddr_t)mfn) << PAGE_SHIFT) | PG_RW |
+				    PG_V | PG_M | PG_A, 0);
+				pfn = (uintptr_t)m->m_ext.ext_args;
+				mmu->ptr = ((vm_paddr_t)mfn << PAGE_SHIFT) |
+				    MMU_MACHPHYS_UPDATE;
+				mmu->val = pfn;
+
+				set_phys_to_machine(pfn, mfn);
+			}
+			pages_flipped++;
+		} else {
+			ret = gnttab_end_foreign_access_ref(ref);
+			KASSERT(ret, ("ret != 0"));
+		}
+
+		gnttab_release_grant_reference(&np->gref_rx_head, ref);
+
+next:
+		if (m != NULL) {
+				m->m_len = rx->status;
+				m->m_data += rx->offset;
+				m0->m_pkthdr.len += rx->status;
+		}
+		
+		if (!(rx->flags & NETRXF_more_data))
+			break;
+
+		if (cons + frags == rp) {
+			if (net_ratelimit())
+				WPRINTK("Need more frags\n");
+			err = -ENOENT;
+				break;
+		}
+		m_prev = m;
+		
+		rx = RING_GET_RESPONSE(&np->rx, cons + frags);
+		m = xennet_get_rx_mbuf(np, cons + frags);
+
+		m_prev->m_next = m;
+		m->m_next = NULL;
+		ref = xennet_get_rx_ref(np, cons + frags);
+		frags++;
+	}
+	*list = m0;
+
+	if (unlikely(frags > max)) {
+		if (net_ratelimit())
+			WPRINTK("Too many frags\n");
+		err = -E2BIG;
+	}
+
+	if (unlikely(err))
+		np->rx.rsp_cons = cons + frags;
+
+	*pages_flipped_p = pages_flipped;
+
+	return err;
+}
+
+static void
+xn_tick_locked(struct netfront_info *sc) 
+{
+	XN_RX_LOCK_ASSERT(sc);
+	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+	/* XXX placeholder for printing debug information */
+     
+}
+
+
+static void
+xn_tick(void *xsc) 
+{
+	struct netfront_info *sc;
+    
+	sc = xsc;
+	XN_RX_LOCK(sc);
+	xn_tick_locked(sc);
+	XN_RX_UNLOCK(sc);
+     
+}
+static void
+xn_start_locked(struct ifnet *ifp) 
+{
+	int otherend_id;
+	unsigned short id;
+	struct mbuf *m_head, *m;
+	struct netfront_info *sc;
+	netif_tx_request_t *tx;
+	netif_extra_info_t *extra;
+	RING_IDX i;
+	grant_ref_t ref;
+	u_long mfn, tx_bytes;
+	int notify, nfrags;
+
+	sc = ifp->if_softc;
+	otherend_id = xenbus_get_otherend_id(sc->xbdev);
+	tx_bytes = 0;
+
+	if (!netfront_carrier_ok(sc))
+		return;
+	
+	for (i = sc->tx.req_prod_pvt; TRUE; i++) {
+		IF_DEQUEUE(&ifp->if_snd, m_head);
+		if (m_head == NULL) 
+			break;
+		
+		if (!netfront_tx_slot_available(sc)) {
+			IF_PREPEND(&ifp->if_snd, m_head);
+			ifp->if_drv_flags |= IFF_DRV_OACTIVE;
+			break;
+		}
+		
+
+		/*
+		 * Defragment the mbuf if necessary.
+		 */
+		for (m = m_head, nfrags = 0; m; m = m->m_next)
+			nfrags++;
+		if (nfrags > MAX_SKB_FRAGS) {
+			m = m_defrag(m_head, M_DONTWAIT);
+			if (!m) {
+				m_freem(m_head);
+				break;
+			}
+			m_head = m;
+		}
+
+		/*
+		 * Start packing the mbufs in this chain into
+		 * the fragment pointers. Stop when we run out
+		 * of fragments or hit the end of the mbuf chain.
+		 */
+		m = m_head;
+		extra = NULL;
+		for (m = m_head; m; m = m->m_next) {
+			tx = RING_GET_REQUEST(&sc->tx, i);
+			id = get_id_from_freelist(sc->xn_cdata.xn_tx_chain);
+			sc->xn_cdata.xn_tx_chain[id] = m;
+			tx->id = id;
+			ref = gnttab_claim_grant_reference(&sc->gref_tx_head);
+			KASSERT((short)ref >= 0, ("Negative ref"));
+			mfn = virt_to_mfn(mtod(m, vm_offset_t));
+			gnttab_grant_foreign_access_ref(ref, otherend_id,
+			    mfn, GNTMAP_readonly);
+			tx->gref = sc->grant_tx_ref[id] = ref;
+			tx->offset = mtod(m, vm_offset_t) & (PAGE_SIZE - 1);
+			tx->flags = 0;
+			if (m == m_head) {
+				/*
+				 * The first fragment has the entire packet
+				 * size, subsequent fragments have just the
+				 * fragment size. The backend works out the
+				 * true size of the first fragment by
+				 * subtracting the sizes of the other
+				 * fragments.
+				 */
+				tx->size = m->m_pkthdr.len;
+
+				/*
+				 * The first fragment contains the
+				 * checksum flags and is optionally
+				 * followed by extra data for TSO etc.
+				 */
+				if (m->m_pkthdr.csum_flags
+				    & CSUM_DELAY_DATA) {
+					tx->flags |= (NETTXF_csum_blank
+					    | NETTXF_data_validated);
+				}
+#if __FreeBSD_version >= 700000
+				if (m->m_pkthdr.csum_flags & CSUM_TSO) {
+					struct netif_extra_info *gso =
+						(struct netif_extra_info *)
+						RING_GET_REQUEST(&sc->tx, ++i);
+
+					if (extra)
+						extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+					else
+						tx->flags |= NETTXF_extra_info;
+
+					gso->u.gso.size = m->m_pkthdr.tso_segsz;
+					gso->u.gso.type =
+						XEN_NETIF_GSO_TYPE_TCPV4;
+					gso->u.gso.pad = 0;
+					gso->u.gso.features = 0;
+
+					gso->type = XEN_NETIF_EXTRA_TYPE_GSO;
+					gso->flags = 0;
+					extra = gso;
+				}
+#endif
+			} else {
+				tx->size = m->m_len;
+			}
+			if (m->m_next) {
+				tx->flags |= NETTXF_more_data;
+				i++;
+			}
+		}
+
+		BPF_MTAP(ifp, m_head);
+
+		sc->stats.tx_bytes += m_head->m_pkthdr.len;
+		sc->stats.tx_packets++;
+	}
+
+	sc->tx.req_prod_pvt = i;
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify);
+	if (notify)
+		notify_remote_via_irq(sc->irq);
+
+	xn_txeof(sc);
+
+	if (RING_FULL(&sc->tx)) {
+		sc->tx_full = 1;
+#if 0
+		netif_stop_queue(dev);
+#endif
+	}
+
+	return;
+}    
+
+static void
+xn_start(struct ifnet *ifp)
+{
+	struct netfront_info *sc;
+	sc = ifp->if_softc;
+	XN_TX_LOCK(sc);
+	xn_start_locked(ifp);
+	XN_TX_UNLOCK(sc);
+}
+
+/* equivalent of network_open() in Linux */
+static void 
+xn_ifinit_locked(struct netfront_info *sc) 
+{
+	struct ifnet *ifp;
+	
+	XN_LOCK_ASSERT(sc);
+	
+	ifp = sc->xn_ifp;
+	
+	if (ifp->if_drv_flags & IFF_DRV_RUNNING) 
+		return;
+	
+	xn_stop(sc);
+	
+	network_alloc_rx_buffers(sc);
+	sc->rx.sring->rsp_event = sc->rx.rsp_cons + 1;
+	
+	ifp->if_drv_flags |= IFF_DRV_RUNNING;
+	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
+	
+	callout_reset(&sc->xn_stat_ch, hz, xn_tick, sc);
+
+}
+
+
+static void 
+xn_ifinit(void *xsc)
+{
+	struct netfront_info *sc = xsc;
+    
+	XN_LOCK(sc);
+	xn_ifinit_locked(sc);
+	XN_UNLOCK(sc);
+
+}
+
+
+static int
+xn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct netfront_info *sc = ifp->if_softc;
+	struct ifreq *ifr = (struct ifreq *) data;
+	struct ifaddr *ifa = (struct ifaddr *)data;
+
+	int mask, error = 0;
+	switch(cmd) {
+	case SIOCSIFADDR:
+	case SIOCGIFADDR:
+		XN_LOCK(sc);
+		if (ifa->ifa_addr->sa_family == AF_INET) {
+			ifp->if_flags |= IFF_UP;
+			if (!(ifp->if_drv_flags & IFF_DRV_RUNNING)) 
+				xn_ifinit_locked(sc);
+			arp_ifinit(ifp, ifa);
+			XN_UNLOCK(sc);	
+		} else {
+			XN_UNLOCK(sc);	
+			error = ether_ioctl(ifp, cmd, data);
+		}
+		break;
+	case SIOCSIFMTU:
+		/* XXX can we alter the MTU on a VN ?*/
+#ifdef notyet
+		if (ifr->ifr_mtu > XN_JUMBO_MTU)
+			error = EINVAL;
+		else 
+#endif
+		{
+			ifp->if_mtu = ifr->ifr_mtu;
+			ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
+			xn_ifinit(sc);
+		}
+		break;
+	case SIOCSIFFLAGS:
+		XN_LOCK(sc);
+		if (ifp->if_flags & IFF_UP) {
+			/*
+			 * If only the state of the PROMISC flag changed,
+			 * then just use the 'set promisc mode' command
+			 * instead of reinitializing the entire NIC. Doing
+			 * a full re-init means reloading the firmware and
+			 * waiting for it to start up, which may take a
+			 * second or two.
+			 */
+#ifdef notyet
+			/* No promiscuous mode with Xen */
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+			    ifp->if_flags & IFF_PROMISC &&
+			    !(sc->xn_if_flags & IFF_PROMISC)) {
+				XN_SETBIT(sc, XN_RX_MODE,
+					  XN_RXMODE_RX_PROMISC);
+			} else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
+				   !(ifp->if_flags & IFF_PROMISC) &&
+				   sc->xn_if_flags & IFF_PROMISC) {
+				XN_CLRBIT(sc, XN_RX_MODE,
+					  XN_RXMODE_RX_PROMISC);
+			} else
+#endif
+				xn_ifinit_locked(sc);
+		} else {
+			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+				xn_stop(sc);
+			}
+		}
+		sc->xn_if_flags = ifp->if_flags;
+		XN_UNLOCK(sc);
+		error = 0;
+		break;
+	case SIOCSIFCAP:
+		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
+		if (mask & IFCAP_TXCSUM) {
+			if (IFCAP_TXCSUM & ifp->if_capenable) {
+				ifp->if_capenable &= ~(IFCAP_TXCSUM|IFCAP_TSO4);
+				ifp->if_hwassist &= ~(CSUM_TCP | CSUM_UDP
+				    | CSUM_IP | CSUM_TSO);
+			} else {
+				ifp->if_capenable |= IFCAP_TXCSUM;
+				ifp->if_hwassist |= (CSUM_TCP | CSUM_UDP
+				    | CSUM_IP);
+			}
+		}
+		if (mask & IFCAP_RXCSUM) {
+			ifp->if_capenable ^= IFCAP_RXCSUM;
+		}
+#if __FreeBSD_version >= 700000
+		if (mask & IFCAP_TSO4) {
+			if (IFCAP_TSO4 & ifp->if_capenable) {
+				ifp->if_capenable &= ~IFCAP_TSO4;
+				ifp->if_hwassist &= ~CSUM_TSO;
+			} else if (IFCAP_TXCSUM & ifp->if_capenable) {
+				ifp->if_capenable |= IFCAP_TSO4;
+				ifp->if_hwassist |= CSUM_TSO;
+			} else {
+				DPRINTK("Xen requires tx checksum offload"
+				    " be enabled to use TSO\n");
+				error = EINVAL;
+			}
+		}
+		if (mask & IFCAP_LRO) {
+			ifp->if_capenable ^= IFCAP_LRO;
+			
+		}
+#endif
+		error = 0;
+		break;
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+#ifdef notyet
+		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
+			XN_LOCK(sc);
+			xn_setmulti(sc);
+			XN_UNLOCK(sc);
+			error = 0;
+		}
+#endif
+		/* FALLTHROUGH */
+	case SIOCSIFMEDIA:
+	case SIOCGIFMEDIA:
+		error = EINVAL;
+		break;
+	default:
+		error = ether_ioctl(ifp, cmd, data);
+	}
+    
+	return (error);
+}
+
+static void
+xn_stop(struct netfront_info *sc)
+{	
+	struct ifnet *ifp;
+
+	XN_LOCK_ASSERT(sc);
+    
+	ifp = sc->xn_ifp;
+
+	callout_stop(&sc->xn_stat_ch);
+
+	xn_free_rx_ring(sc);
+	xn_free_tx_ring(sc);
+    
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+}
+
+/* START of Xenolinux helper functions adapted to FreeBSD */
+int
+network_connect(struct netfront_info *np)
+{
+	int i, requeue_idx, error;
+	grant_ref_t ref;
+	netif_rx_request_t *req;
+	u_int feature_rx_copy, feature_rx_flip;
+
+	error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
+	    "feature-rx-copy", NULL, "%u", &feature_rx_copy);
+	if (error)
+		feature_rx_copy = 0;
+	error = xenbus_scanf(XBT_NIL, xenbus_get_otherend_path(np->xbdev),
+	    "feature-rx-flip", NULL, "%u", &feature_rx_flip);
+	if (error)
+		feature_rx_flip = 1;
+
+	/*
+	 * Copy packets on receive path if:
+	 *  (a) This was requested by user, and the backend supports it; or
+	 *  (b) Flipping was requested, but this is unsupported by the backend.
+	 */
+	np->copying_receiver = ((MODPARM_rx_copy && feature_rx_copy) ||
+				(MODPARM_rx_flip && !feature_rx_flip));
+
+	XN_LOCK(np);
+	/* Recovery procedure: */
+	error = talk_to_backend(np->xbdev, np);
+	if (error) 
+		return (error);
+	
+	/* Step 1: Reinitialise variables. */
+	netif_release_tx_bufs(np);
+
+	/* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
+	for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
+		struct mbuf *m;
+		u_long pfn;
+
+		if (np->rx_mbufs[i] == NULL)
+			continue;
+
+		m = np->rx_mbufs[requeue_idx] = xennet_get_rx_mbuf(np, i);
+		ref = np->grant_rx_ref[requeue_idx] = xennet_get_rx_ref(np, i);
+		req = RING_GET_REQUEST(&np->rx, requeue_idx);
+		pfn = vtophys(mtod(m, vm_offset_t)) >> PAGE_SHIFT;
+
+		if (!np->copying_receiver) {
+			gnttab_grant_foreign_transfer_ref(ref,
+			    xenbus_get_otherend_id(np->xbdev),
+			    pfn);
+		} else {
+			gnttab_grant_foreign_access_ref(ref,
+			    xenbus_get_otherend_id(np->xbdev),
+			    PFNTOMFN(pfn), 0);
+		}
+		req->gref = ref;
+		req->id   = requeue_idx;
+
+		requeue_idx++;
+	}
+
+	np->rx.req_prod_pvt = requeue_idx;
+	
+	/* Step 3: All public and private state should now be sane.  Get
+	 * ready to start sending and receiving packets and give the driver
+	 * domain a kick because we've probably just requeued some
+	 * packets.
+	 */
+	netfront_carrier_on(np);
+	notify_remote_via_irq(np->irq);
+	XN_TX_LOCK(np);
+	xn_txeof(np);
+	XN_TX_UNLOCK(np);
+	network_alloc_rx_buffers(np);
+	XN_UNLOCK(np);
+
+	return (0);
+}
+
+static void 
+show_device(struct netfront_info *sc)
+{
+#ifdef DEBUG
+	if (sc) {
+		IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
+			sc->xn_ifno,
+			be_state_name[sc->xn_backend_state],
+			sc->xn_user_state ? "open" : "closed",
+			sc->xn_evtchn,
+			sc->xn_irq,
+			sc->xn_tx_if,
+			sc->xn_rx_if);
+	} else {
+		IPRINTK("<vif NULL>\n");
+	}
+#endif
+}
+
+/** Create a network device.
+ * @param handle device handle
+ */
+int 
+create_netdev(device_t dev)
+{
+	int i;
+	struct netfront_info *np;
+	int err;
+	struct ifnet *ifp;
+
+	np = device_get_softc(dev);
+	
+	np->xbdev         = dev;
+    
+	XN_LOCK_INIT(np, xennetif);
+	np->rx_target     = RX_MIN_TARGET;
+	np->rx_min_target = RX_MIN_TARGET;
+	np->rx_max_target = RX_MAX_TARGET;
+	
+	/* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
+	for (i = 0; i <= NET_TX_RING_SIZE; i++) {
+		np->tx_mbufs[i] = (void *) ((u_long) i+1);
+		np->grant_tx_ref[i] = GRANT_INVALID_REF;	
+	}
+	for (i = 0; i <= NET_RX_RING_SIZE; i++) {
+		np->rx_mbufs[i] = NULL;
+		np->grant_rx_ref[i] = GRANT_INVALID_REF;
+	}
+	/* A grant for every tx ring slot */
+	if (gnttab_alloc_grant_references(TX_MAX_TARGET,
+					  &np->gref_tx_head) < 0) {
+		printf("#### netfront can't alloc tx grant refs\n");
+		err = ENOMEM;
+		goto exit;
+	}
+	/* A grant for every rx ring slot */
+	if (gnttab_alloc_grant_references(RX_MAX_TARGET,
+					  &np->gref_rx_head) < 0) {
+		printf("#### netfront can't alloc rx grant refs\n");
+		gnttab_free_grant_references(np->gref_tx_head);
+		err = ENOMEM;
+		goto exit;
+	}
+	
+	err = xen_net_read_mac(dev, np->mac);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "parsing %s/mac",
+		    xenbus_get_node(dev));
+		goto out;
+	}
+	
+	/* Set up ifnet structure */
+	ifp = np->xn_ifp = if_alloc(IFT_ETHER);
+    	ifp->if_softc = np;
+    	if_initname(ifp, "xn",  device_get_unit(dev));
+    	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+    	ifp->if_ioctl = xn_ioctl;
+    	ifp->if_output = ether_output;
+    	ifp->if_start = xn_start;
+#ifdef notyet
+    	ifp->if_watchdog = xn_watchdog;
+#endif
+    	ifp->if_init = xn_ifinit;
+    	ifp->if_mtu = ETHERMTU;
+    	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+	
+    	ifp->if_hwassist = XN_CSUM_FEATURES;
+    	ifp->if_capabilities = IFCAP_HWCSUM;
+#if __FreeBSD_version >= 700000
+	ifp->if_capabilities |= IFCAP_TSO4;
+	if (xn_enable_lro) {
+		int err = tcp_lro_init(&np->xn_lro);
+		if (err) {
+			device_printf(dev, "LRO initialization failed\n");
+			goto exit;
+		}
+		np->xn_lro.ifp = ifp;
+		ifp->if_capabilities |= IFCAP_LRO;
+	}
+#endif
+    	ifp->if_capenable = ifp->if_capabilities;
+	
+    	ether_ifattach(ifp, np->mac);
+    	callout_init(&np->xn_stat_ch, CALLOUT_MPSAFE);
+	netfront_carrier_off(np);
+
+	return (0);
+
+exit:
+	gnttab_free_grant_references(np->gref_tx_head);
+out:
+	panic("do something smart");
+
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+#if 0
+static void netfront_closing(device_t dev)
+{
+#if 0
+	struct netfront_info *info = dev->dev_driver_data;
+
+	DPRINTK("netfront_closing: %s removed\n", dev->nodename);
+
+	close_netdev(info);
+#endif
+	xenbus_switch_state(dev, XenbusStateClosed);
+}
+#endif
+
+static int netfront_detach(device_t dev)
+{
+	struct netfront_info *info = device_get_softc(dev);
+
+	DPRINTK("%s\n", xenbus_get_node(dev));
+
+	netif_free(info);
+
+	return 0;
+}
+
+
+static void netif_free(struct netfront_info *info)
+{
+	netif_disconnect_backend(info);
+#if 0
+	close_netdev(info);
+#endif
+}
+
+static void netif_disconnect_backend(struct netfront_info *info)
+{
+	XN_RX_LOCK(info);
+	XN_TX_LOCK(info);
+	netfront_carrier_off(info);
+	XN_TX_UNLOCK(info);
+	XN_RX_UNLOCK(info);
+
+	end_access(info->tx_ring_ref, info->tx.sring);
+	end_access(info->rx_ring_ref, info->rx.sring);
+	info->tx_ring_ref = GRANT_INVALID_REF;
+	info->rx_ring_ref = GRANT_INVALID_REF;
+	info->tx.sring = NULL;
+	info->rx.sring = NULL;
+
+	if (info->irq)
+		unbind_from_irqhandler(info->irq);
+
+	info->irq = 0;
+}
+
+
+static void end_access(int ref, void *page)
+{
+	if (ref != GRANT_INVALID_REF)
+		gnttab_end_foreign_access(ref, page);
+}
+
+/* ** Driver registration ** */
+static device_method_t netfront_methods[] = { 
+	/* Device interface */ 
+	DEVMETHOD(device_probe,         netfront_probe), 
+	DEVMETHOD(device_attach,        netfront_attach), 
+	DEVMETHOD(device_detach,        netfront_detach), 
+	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
+	DEVMETHOD(device_suspend,       bus_generic_suspend), 
+	DEVMETHOD(device_resume,        netfront_resume), 
+ 
+	/* Xenbus interface */
+	DEVMETHOD(xenbus_backend_changed, netfront_backend_changed),
+
+	{ 0, 0 } 
+}; 
+
+static driver_t netfront_driver = { 
+	"xn", 
+	netfront_methods, 
+	sizeof(struct netfront_info),                      
+}; 
+devclass_t netfront_devclass; 
+ 
+DRIVER_MODULE(xe, xenbus, netfront_driver, netfront_devclass, 0, 0); 

Property changes on: dev/xen/netfront/netfront.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native


Property changes on: dev/xen/netfront
___________________________________________________________________
Added: svn:mergeinfo
   Merged /stable/7/sys/dev/xen/netfront:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/dev/xen/netfront:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
   Merged /user/dfr/xenhvm/7/sys/dev/xen/netfront:r188754,188757,188991,188996

Index: dev/xen/blkfront/block.h
===================================================================
--- dev/xen/blkfront/block.h	(.../stable/6/sys)	(revision 0)
+++ dev/xen/blkfront/block.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,97 @@
+/*
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+
+#ifndef __XEN_DRIVERS_BLOCK_H__
+#define __XEN_DRIVERS_BLOCK_H__
+#include <xen/interface/io/blkif.h>
+
+struct xlbd_type_info
+{
+	int partn_shift;
+	int disks_per_major;
+	char *devname;
+	char *diskname;
+};
+
+struct xlbd_major_info
+{
+	int major;
+	int index;
+	int usage;
+	struct xlbd_type_info *type;
+};
+
+struct blk_shadow {
+	blkif_request_t req;
+	unsigned long request;
+	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+
+struct xb_softc {
+	device_t		  xb_dev;
+	struct disk		  *xb_disk;		/* disk params */
+	struct bio_queue_head     xb_bioq;		/* sort queue */
+	int			  xb_unit;
+	int			  xb_flags;
+	struct blkfront_info      *xb_info;
+	LIST_ENTRY(xb_softc)      entry;
+#define XB_OPEN	(1<<0)		/* drive is open (can't shut down) */
+};
+
+
+/*
+ * We have one of these per vbd, whether ide, scsi or 'other'.  They
+ * hang in private_data off the gendisk structure. We may end up
+ * putting all kinds of interesting stuff here :-)
+ */
+struct blkfront_info
+{
+	device_t xbdev;
+	dev_t dev;
+ 	struct gendisk *gd;
+	int vdevice;
+	blkif_vdev_t handle;
+	int connected;
+	int ring_ref;
+	blkif_front_ring_t ring;
+	unsigned int irq;
+	struct xlbd_major_info *mi;
+#if 0
+	request_queue_t *rq;
+	struct work_struct work;
+#endif
+	struct gnttab_free_callback callback;
+	struct blk_shadow shadow[BLK_RING_SIZE];
+	unsigned long shadow_free;
+	struct xb_softc *sc;
+	int feature_barrier;
+	int is_ready;
+	/**
+	 * The number of people holding this device open.  We won't allow a
+	 * hot-unplug unless this is 0.
+	 */
+	int users;
+};
+/* Note that xlvbd_add doesn't call add_disk for you: you're expected
+   to call add_disk on info->gd once the disk is properly connected
+   up. */
+int xlvbd_add(device_t, blkif_sector_t capacity, int device,
+	      uint16_t vdisk_info, uint16_t sector_size, struct blkfront_info *info);
+void xlvbd_del(struct blkfront_info *info);
+
+#endif /* __XEN_DRIVERS_BLOCK_H__ */
+

Property changes on: dev/xen/blkfront/block.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/blkfront/blkfront.c
===================================================================
--- dev/xen/blkfront/blkfront.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/blkfront/blkfront.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1119 @@
+/*-
+ * All rights reserved.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * XenoBSD block device driver
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <sys/bio.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/intr_machdep.h>
+#include <machine/vmparam.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <xen/gnttab.h>
+#include <xen/interface/grant_table.h>
+#include <xen/interface/io/protocols.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <geom/geom_disk.h>
+
+#include <dev/xen/blkfront/block.h>
+
+#include "xenbus_if.h"
+
+#define    ASSERT(S)       KASSERT(S, (#S))
+/* prototypes */
+struct xb_softc;
+static void xb_startio(struct xb_softc *sc);
+static void connect(device_t, struct blkfront_info *);
+static void blkfront_closing(device_t);
+static int blkfront_detach(device_t);
+static int talk_to_backend(device_t, struct blkfront_info *);
+static int setup_blkring(device_t, struct blkfront_info *);
+static void blkif_int(void *);
+#if 0
+static void blkif_restart_queue(void *arg);
+#endif
+static void blkif_recover(struct blkfront_info *);
+static void blkif_completion(struct blk_shadow *);
+static void blkif_free(struct blkfront_info *, int);
+
+#define GRANT_INVALID_REF 0
+#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, PAGE_SIZE)
+
+LIST_HEAD(xb_softc_list_head, xb_softc) xbsl_head;
+
+/* Control whether runtime update of vbds is enabled. */
+#define ENABLE_VBD_UPDATE 0
+
+#if ENABLE_VBD_UPDATE
+static void vbd_update(void);
+#endif
+
+
+#define BLKIF_STATE_DISCONNECTED 0
+#define BLKIF_STATE_CONNECTED    1
+#define BLKIF_STATE_SUSPENDED    2
+
+#ifdef notyet
+static char *blkif_state_name[] = {
+	[BLKIF_STATE_DISCONNECTED] = "disconnected",
+	[BLKIF_STATE_CONNECTED]    = "connected",
+	[BLKIF_STATE_SUSPENDED]    = "closed",
+};
+
+static char * blkif_status_name[] = {
+	[BLKIF_INTERFACE_STATUS_CLOSED]       = "closed",
+	[BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
+	[BLKIF_INTERFACE_STATUS_CONNECTED]    = "connected",
+	[BLKIF_INTERFACE_STATUS_CHANGED]      = "changed",
+};
+#endif
+#define WPRINTK(fmt, args...) printf("[XEN] " fmt, ##args)
+#if 0
+#define DPRINTK(fmt, args...) printf("[XEN] %s:%d: " fmt ".\n", __func__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) 
+#endif
+
+static grant_ref_t gref_head;
+#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
+    (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE)
+
+static void kick_pending_request_queues(struct blkfront_info *);
+static int blkif_open(struct disk *dp);
+static int blkif_close(struct disk *dp);
+static int blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td);
+static int blkif_queue_request(struct bio *bp);
+static void xb_strategy(struct bio *bp);
+
+
+
+/* XXX move to xb_vbd.c when VBD update support is added */
+#define MAX_VBDS 64
+
+#define XBD_SECTOR_SIZE		512	/* XXX: assume for now */
+#define XBD_SECTOR_SHFT		9
+
+static struct mtx blkif_io_lock;
+
+static vm_paddr_t
+pfn_to_mfn(vm_paddr_t pfn)
+{
+	return (phystomach(pfn << PAGE_SHIFT) >> PAGE_SHIFT);
+}
+
+/*
+ * Translate Linux major/minor to an appropriate name and unit
+ * number. For HVM guests, this allows us to use the same drive names
+ * with blkfront as the emulated drives, easing transition slightly.
+ */
+static void
+blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
+{
+	static struct vdev_info {
+		int major;
+		int shift;
+		int base;
+		const char *name;
+	} info[] = {
+		{3,	6,	0,	"ad"},	/* ide0 */
+		{22,	6,	2,	"ad"},	/* ide1 */
+		{33,	6,	4,	"ad"},	/* ide2 */
+		{34,	6,	6,	"ad"},	/* ide3 */
+		{56,	6,	8,	"ad"},	/* ide4 */
+		{57,	6,	10,	"ad"},	/* ide5 */
+		{88,	6,	12,	"ad"},	/* ide6 */
+		{89,	6,	14,	"ad"},	/* ide7 */
+		{90,	6,	16,	"ad"},	/* ide8 */
+		{91,	6,	18,	"ad"},	/* ide9 */
+
+		{8,	4,	0,	"da"},	/* scsi disk0 */
+		{65,	4,	16,	"da"},	/* scsi disk1 */
+		{66,	4,	32,	"da"},	/* scsi disk2 */
+		{67,	4,	48,	"da"},	/* scsi disk3 */
+		{68,	4,	64,	"da"},	/* scsi disk4 */
+		{69,	4,	80,	"da"},	/* scsi disk5 */
+		{70,	4,	96,	"da"},	/* scsi disk6 */
+		{71,	4,	112,	"da"},	/* scsi disk7 */
+		{128,	4,	128,	"da"},	/* scsi disk8 */
+		{129,	4,	144,	"da"},	/* scsi disk9 */
+		{130,	4,	160,	"da"},	/* scsi disk10 */
+		{131,	4,	176,	"da"},	/* scsi disk11 */
+		{132,	4,	192,	"da"},	/* scsi disk12 */
+		{133,	4,	208,	"da"},	/* scsi disk13 */
+		{134,	4,	224,	"da"},	/* scsi disk14 */
+		{135,	4,	240,	"da"},	/* scsi disk15 */
+
+		{202,	4,	0,	"xbd"},	/* xbd */
+
+		{0,	0,	0,	NULL},
+	};
+	int major = vdevice >> 8;
+	int minor = vdevice & 0xff;
+	int i;
+
+	if (vdevice & (1 << 28)) {
+		*unit = (vdevice & ((1 << 28) - 1)) >> 8;
+		*name = "xbd";
+	}
+
+	for (i = 0; info[i].major; i++) {
+		if (info[i].major == major) {
+			*unit = info[i].base + (minor >> info[i].shift);
+			*name = info[i].name;
+			return;
+		}
+	}
+
+	*unit = minor >> 4;
+	*name = "xbd";
+}
+
+int
+xlvbd_add(device_t dev, blkif_sector_t capacity,
+    int vdevice, uint16_t vdisk_info, uint16_t sector_size, 
+    struct blkfront_info *info)
+{
+	struct xb_softc	*sc;
+	int	unit, error = 0;
+	const char *name;
+
+	blkfront_vdevice_to_unit(vdevice, &unit, &name);
+
+	sc = (struct xb_softc *)malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+	sc->xb_unit = unit;
+	sc->xb_info = info;
+	info->sc = sc;
+
+	if (strcmp(name, "xbd"))
+		device_printf(dev, "attaching as %s%d\n", name, unit);
+
+	memset(&sc->xb_disk, 0, sizeof(sc->xb_disk)); 
+	sc->xb_disk = disk_alloc();
+	sc->xb_disk->d_unit = sc->xb_unit;
+	sc->xb_disk->d_open = blkif_open;
+	sc->xb_disk->d_close = blkif_close;
+	sc->xb_disk->d_ioctl = blkif_ioctl;
+	sc->xb_disk->d_strategy = xb_strategy;
+	sc->xb_disk->d_name = name;
+	sc->xb_disk->d_drv1 = sc;
+	sc->xb_disk->d_sectorsize = sector_size;
+
+	/* XXX */
+	sc->xb_disk->d_mediasize = capacity << XBD_SECTOR_SHFT;
+#if 0
+	sc->xb_disk->d_maxsize = DFLTPHYS;
+#else /* XXX: xen can't handle large single i/o requests */
+	sc->xb_disk->d_maxsize = 4096;
+#endif
+#ifdef notyet
+	XENPRINTF("attaching device 0x%x unit %d capacity %llu\n",
+		  xb_diskinfo[sc->xb_unit].device, sc->xb_unit,
+		  sc->xb_disk->d_mediasize);
+#endif
+	sc->xb_disk->d_flags = 0;
+	disk_create(sc->xb_disk, DISK_VERSION_00);
+	bioq_init(&sc->xb_bioq);
+
+	return error;
+}
+
+void
+xlvbd_del(struct blkfront_info *info)
+{
+	struct xb_softc	*sc;
+
+	sc = info->sc;
+	disk_destroy(sc->xb_disk);
+}
+/************************ end VBD support *****************/
+
+/*
+ * Read/write routine for a buffer.  Finds the proper unit, place it on
+ * the sortq and kick the controller.
+ */
+static void
+xb_strategy(struct bio *bp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+	/* bogus disk? */
+	if (sc == NULL) {
+		bp->bio_error = EINVAL;
+		bp->bio_flags |= BIO_ERROR;
+		goto bad;
+	}
+
+	DPRINTK("");
+
+	/*
+	 * Place it in the queue of disk activities for this disk
+	 */
+	mtx_lock(&blkif_io_lock);
+	bioq_disksort(&sc->xb_bioq, bp);
+
+	xb_startio(sc);
+	mtx_unlock(&blkif_io_lock);
+	return;
+
+ bad:
+	/*
+	 * Correctly set the bio to indicate a failed tranfer.
+	 */
+	bp->bio_resid = bp->bio_bcount;
+	biodone(bp);
+	return;
+}
+
+static int
+blkfront_probe(device_t dev)
+{
+
+	if (!strcmp(xenbus_get_type(dev), "vbd")) {
+		device_set_desc(dev, "Virtual Block Device");
+		device_quiet(dev);
+		return (0);
+	}
+
+	return (ENXIO);
+}
+
+/*
+ * Setup supplies the backend dir, virtual device.  We place an event
+ * channel and shared frame entries.  We watch backend to wait if it's
+ * ok.
+ */
+static int
+blkfront_attach(device_t dev)
+{
+	int error, vdevice, i, unit;
+	struct blkfront_info *info;
+	const char *name;
+
+	/* FIXME: Use dynamic device id if this is not set. */
+	error = xenbus_scanf(XBT_NIL, xenbus_get_node(dev),
+	    "virtual-device", NULL, "%i", &vdevice);
+	if (error) {
+		xenbus_dev_fatal(dev, error, "reading virtual-device");
+		printf("couldn't find virtual device");
+		return (error);
+	}
+
+	blkfront_vdevice_to_unit(vdevice, &unit, &name);
+	if (!strcmp(name, "xbd"))
+		device_set_unit(dev, unit);
+
+	info = device_get_softc(dev);
+	
+	/*
+	 * XXX debug only
+	 */
+	for (i = 0; i < sizeof(*info); i++)
+			if (((uint8_t *)info)[i] != 0)
+					panic("non-null memory");
+
+	info->shadow_free = 0;
+	info->xbdev = dev;
+	info->vdevice = vdevice;
+	info->connected = BLKIF_STATE_DISCONNECTED;
+
+	/* work queue needed ? */
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Front end dir is a number, which is used as the id. */
+	info->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
+
+	error = talk_to_backend(dev, info);
+	if (error)
+		return (error);
+
+	return (0);
+}
+
+static int
+blkfront_suspend(device_t dev)
+{
+	struct blkfront_info *info = device_get_softc(dev);
+
+	/* Prevent new requests being issued until we fix things up. */
+	mtx_lock(&blkif_io_lock);
+	info->connected = BLKIF_STATE_SUSPENDED;
+	mtx_unlock(&blkif_io_lock);
+
+	return (0);
+}
+
+static int
+blkfront_resume(device_t dev)
+{
+	struct blkfront_info *info = device_get_softc(dev);
+	int err;
+
+	DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
+
+	blkif_free(info, 1);
+	err = talk_to_backend(dev, info);
+	if (info->connected == BLKIF_STATE_SUSPENDED && !err)
+		blkif_recover(info);
+
+	return (err);
+}
+
+/* Common code used when first setting up, and when resuming. */
+static int
+talk_to_backend(device_t dev, struct blkfront_info *info)
+{
+	const char *message = NULL;
+	struct xenbus_transaction xbt;
+	int err;
+
+	/* Create shared ring, alloc event channel. */
+	err = setup_blkring(dev, info);
+	if (err)
+		goto out;
+
+ again:
+	err = xenbus_transaction_start(&xbt);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "starting transaction");
+		goto destroy_blkring;
+	}
+
+	err = xenbus_printf(xbt, xenbus_get_node(dev),
+			    "ring-ref","%u", info->ring_ref);
+	if (err) {
+		message = "writing ring-ref";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, xenbus_get_node(dev),
+		"event-channel", "%u", irq_to_evtchn_port(info->irq));
+	if (err) {
+		message = "writing event-channel";
+		goto abort_transaction;
+	}
+	err = xenbus_printf(xbt, xenbus_get_node(dev),
+		"protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
+	if (err) {
+		message = "writing protocol";
+		goto abort_transaction;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err) {
+		if (err == EAGAIN)
+			goto again;
+		xenbus_dev_fatal(dev, err, "completing transaction");
+		goto destroy_blkring;
+	}
+	xenbus_set_state(dev, XenbusStateInitialised);
+	
+	return 0;
+
+ abort_transaction:
+	xenbus_transaction_end(xbt, 1);
+	if (message)
+		xenbus_dev_fatal(dev, err, "%s", message);
+ destroy_blkring:
+	blkif_free(info, 0);
+ out:
+	return err;
+}
+
+static int 
+setup_blkring(device_t dev, struct blkfront_info *info)
+{
+	blkif_sring_t *sring;
+	int error;
+
+	info->ring_ref = GRANT_INVALID_REF;
+
+	sring = (blkif_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+	if (sring == NULL) {
+		xenbus_dev_fatal(dev, ENOMEM, "allocating shared ring");
+		return ENOMEM;
+	}
+	SHARED_RING_INIT(sring);
+	FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
+
+	error = xenbus_grant_ring(dev,
+	    (vtomach(info->ring.sring) >> PAGE_SHIFT), &info->ring_ref);
+	if (error) {
+		free(sring, M_DEVBUF);
+		info->ring.sring = NULL;
+		goto fail;
+	}
+	
+	error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev),
+	    "xbd", (driver_intr_t *)blkif_int, info,
+	    INTR_TYPE_BIO | INTR_MPSAFE, &info->irq);
+	if (error) {
+		xenbus_dev_fatal(dev, error,
+		    "bind_evtchn_to_irqhandler failed");
+		goto fail;
+	}
+
+	return (0);
+ fail:
+	blkif_free(info, 0);
+	return (error);
+}
+
+
+/**
+ * Callback received when the backend's state changes.
+ */
+static void
+blkfront_backend_changed(device_t dev, XenbusState backend_state)
+{
+	struct blkfront_info *info = device_get_softc(dev);
+
+	DPRINTK("backend_state=%d\n", backend_state);
+
+	switch (backend_state) {
+	case XenbusStateUnknown:
+	case XenbusStateInitialising:
+	case XenbusStateInitWait:
+	case XenbusStateInitialised:
+	case XenbusStateClosed:
+	case XenbusStateReconfigured:
+	case XenbusStateReconfiguring:
+		break;
+
+	case XenbusStateConnected:
+		connect(dev, info);
+		break;
+
+	case XenbusStateClosing:
+		if (info->users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+					 "Device in use; refusing to close");
+		else
+			blkfront_closing(dev);
+#ifdef notyet
+		bd = bdget(info->dev);
+		if (bd == NULL)
+			xenbus_dev_fatal(dev, -ENODEV, "bdget failed");
+
+		down(&bd->bd_sem);
+		if (info->users > 0)
+			xenbus_dev_error(dev, -EBUSY,
+					 "Device in use; refusing to close");
+		else
+			blkfront_closing(dev);
+		up(&bd->bd_sem);
+		bdput(bd);
+#endif
+	}
+}
+
+/* 
+** Invoked when the backend is finally 'ready' (and has told produced 
+** the details about the physical device - #sectors, size, etc). 
+*/
+static void 
+connect(device_t dev, struct blkfront_info *info)
+{
+	unsigned long sectors, sector_size;
+	unsigned int binfo;
+	int err;
+
+        if( (info->connected == BLKIF_STATE_CONNECTED) || 
+	    (info->connected == BLKIF_STATE_SUSPENDED) )
+		return;
+
+	DPRINTK("blkfront.c:connect:%s.\n", xenbus_get_otherend_path(dev));
+
+	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
+			    "sectors", "%lu", &sectors,
+			    "info", "%u", &binfo,
+			    "sector-size", "%lu", &sector_size,
+			    NULL);
+	if (err) {
+		xenbus_dev_fatal(dev, err,
+		    "reading backend fields at %s",
+		    xenbus_get_otherend_path(dev));
+		return;
+	}
+	err = xenbus_gather(XBT_NIL, xenbus_get_otherend_path(dev),
+			    "feature-barrier", "%lu", &info->feature_barrier,
+			    NULL);
+	if (err)
+		info->feature_barrier = 0;
+
+	device_printf(dev, "%juMB <%s> at %s",
+	    (uintmax_t) sectors / (1048576 / sector_size),
+	    device_get_desc(dev),
+	    xenbus_get_node(dev));
+	bus_print_child_footer(device_get_parent(dev), dev);
+
+	xlvbd_add(dev, sectors, info->vdevice, binfo, sector_size, info);
+
+	(void)xenbus_set_state(dev, XenbusStateConnected); 
+
+	/* Kick pending requests. */
+	mtx_lock(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+	info->is_ready = 1;
+	
+#if 0
+	add_disk(info->gd);
+#endif
+}
+
+/**
+ * Handle the change of state of the backend to Closing.  We must delete our
+ * device-layer structures now, to ensure that writes are flushed through to
+ * the backend.  Once is this done, we can switch to Closed in
+ * acknowledgement.
+ */
+static void
+blkfront_closing(device_t dev)
+{
+	struct blkfront_info *info = device_get_softc(dev);
+
+	DPRINTK("blkfront_closing: %s removed\n", xenbus_get_node(dev));
+
+	if (info->mi) {
+		DPRINTK("Calling xlvbd_del\n");
+		xlvbd_del(info);
+		info->mi = NULL;
+	}
+
+	xenbus_set_state(dev, XenbusStateClosed);
+}
+
+
+static int
+blkfront_detach(device_t dev)
+{
+	struct blkfront_info *info = device_get_softc(dev);
+
+	DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
+
+	blkif_free(info, 0);
+
+	return 0;
+}
+
+
+static inline int 
+GET_ID_FROM_FREELIST(struct blkfront_info *info)
+{
+	unsigned long nfree = info->shadow_free;
+	
+	KASSERT(nfree <= BLK_RING_SIZE, ("free %lu > RING_SIZE", nfree));
+	info->shadow_free = info->shadow[nfree].req.id;
+	info->shadow[nfree].req.id = 0x0fffffee; /* debug */
+	return nfree;
+}
+
+static inline void 
+ADD_ID_TO_FREELIST(struct blkfront_info *info, unsigned long id)
+{
+	info->shadow[id].req.id  = info->shadow_free;
+	info->shadow[id].request = 0;
+	info->shadow_free = id;
+}
+
+static inline void 
+flush_requests(struct blkfront_info *info)
+{
+	int notify;
+
+	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&info->ring, notify);
+
+	if (notify)
+		notify_remote_via_irq(info->irq);
+}
+
+static void 
+kick_pending_request_queues(struct blkfront_info *info)
+{
+	/* XXX check if we can't simplify */
+#if 0
+	if (!RING_FULL(&info->ring)) {
+		/* Re-enable calldowns. */
+		blk_start_queue(info->rq);
+		/* Kick things off immediately. */
+		do_blkif_request(info->rq);
+	}
+#endif
+	if (!RING_FULL(&info->ring)) {
+#if 0
+		sc = LIST_FIRST(&xbsl_head);
+		LIST_REMOVE(sc, entry);
+		/* Re-enable calldowns. */
+		blk_start_queue(di->rq);
+#endif
+		/* Kick things off immediately. */
+		xb_startio(info->sc);
+	}
+}
+
+#if 0
+/* XXX */
+static void blkif_restart_queue(void *arg)
+{
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+
+	mtx_lock(&blkif_io_lock);
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+}
+#endif
+
+static void blkif_restart_queue_callback(void *arg)
+{
+#if 0
+	struct blkfront_info *info = (struct blkfront_info *)arg;
+	/* XXX BSD equiv ? */
+
+	schedule_work(&info->work);
+#endif
+}
+
+static int
+blkif_open(struct disk *dp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL) {
+		printf("xb%d: not found", sc->xb_unit);
+		return (ENXIO);
+	}
+
+	sc->xb_flags |= XB_OPEN;
+	sc->xb_info->users++;
+	return (0);
+}
+
+static int
+blkif_close(struct disk *dp)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL)
+		return (ENXIO);
+	sc->xb_flags &= ~XB_OPEN;
+	if (--(sc->xb_info->users) == 0) {
+		/* Check whether we have been instructed to close.  We will
+		   have ignored this request initially, as the device was
+		   still mounted. */
+		device_t dev = sc->xb_info->xbdev;
+		XenbusState state =
+			xenbus_read_driver_state(xenbus_get_otherend_path(dev));
+
+		if (state == XenbusStateClosing)
+			blkfront_closing(dev);
+	}
+	return (0);
+}
+
+static int
+blkif_ioctl(struct disk *dp, u_long cmd, void *addr, int flag, struct thread *td)
+{
+	struct xb_softc	*sc = (struct xb_softc *)dp->d_drv1;
+
+	if (sc == NULL)
+		return (ENXIO);
+
+	return (ENOTTY);
+}
+
+
+/*
+ * blkif_queue_request
+ *
+ * request block io
+ * 
+ * id: for guest use only.
+ * operation: BLKIF_OP_{READ,WRITE,PROBE}
+ * buffer: buffer to read/write into. this should be a
+ *   virtual address in the guest os.
+ */
+static int blkif_queue_request(struct bio *bp)
+{
+	caddr_t alignbuf;
+	vm_paddr_t buffer_ma;
+	blkif_request_t     *ring_req;
+	unsigned long id;
+	uint64_t fsect, lsect;
+	struct xb_softc *sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+	struct blkfront_info *info = sc->xb_info;
+	int ref;
+
+	if (unlikely(sc->xb_info->connected != BLKIF_STATE_CONNECTED))
+		return 1;
+
+	if (gnttab_alloc_grant_references(
+		    BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
+		gnttab_request_free_callback(
+			&info->callback,
+			blkif_restart_queue_callback,
+			info,
+			BLKIF_MAX_SEGMENTS_PER_REQUEST);
+		return 1;
+	}
+
+	/* Check if the buffer is properly aligned */
+	if ((vm_offset_t)bp->bio_data & PAGE_MASK) {
+		int align = (bp->bio_bcount < PAGE_SIZE/2) ? XBD_SECTOR_SIZE : 
+			PAGE_SIZE;
+		caddr_t newbuf = malloc(bp->bio_bcount + align, M_DEVBUF, 
+					M_NOWAIT);
+
+		alignbuf = (char *)roundup2((u_long)newbuf, align);
+
+		/* save a copy of the current buffer */
+		bp->bio_driver1 = newbuf;
+		bp->bio_driver2 = alignbuf;
+
+		/* Copy the data for a write */
+		if (bp->bio_cmd == BIO_WRITE)
+			bcopy(bp->bio_data, alignbuf, bp->bio_bcount);
+	} else
+		alignbuf = bp->bio_data;
+	
+	/* Fill out a communications ring structure. */
+	ring_req 	         = RING_GET_REQUEST(&info->ring, 
+						    info->ring.req_prod_pvt);
+	id		         = GET_ID_FROM_FREELIST(info);
+	info->shadow[id].request = (unsigned long)bp;
+	
+	ring_req->id 	         = id;
+	ring_req->operation 	 = (bp->bio_cmd == BIO_READ) ? BLKIF_OP_READ :
+		BLKIF_OP_WRITE;
+	
+	ring_req->sector_number= (blkif_sector_t)bp->bio_pblkno;
+	ring_req->handle 	  = (blkif_vdev_t)(uintptr_t)sc->xb_disk;
+	
+	ring_req->nr_segments  = 0;	/* XXX not doing scatter/gather since buffer
+					 * chaining is not supported.
+					 */
+
+	buffer_ma = vtomach(alignbuf);
+	fsect = (buffer_ma & PAGE_MASK) >> XBD_SECTOR_SHFT;
+	lsect = fsect + (bp->bio_bcount >> XBD_SECTOR_SHFT) - 1;
+	/* install a grant reference. */
+	ref = gnttab_claim_grant_reference(&gref_head);
+	KASSERT( ref != -ENOSPC, ("grant_reference failed") );
+
+	gnttab_grant_foreign_access_ref(
+		ref,
+		xenbus_get_otherend_id(info->xbdev),
+		buffer_ma >> PAGE_SHIFT,
+		ring_req->operation & 1 ); /* ??? */
+	info->shadow[id].frame[ring_req->nr_segments] = 
+		buffer_ma >> PAGE_SHIFT;
+
+	ring_req->seg[ring_req->nr_segments] =
+		(struct blkif_request_segment) {
+			.gref       = ref,
+			.first_sect = fsect, 
+			.last_sect  = lsect };
+
+	ring_req->nr_segments++;
+	KASSERT((buffer_ma & (XBD_SECTOR_SIZE-1)) == 0,
+		("XEN buffer must be sector aligned"));
+	KASSERT(lsect <= 7, 
+		("XEN disk driver data cannot cross a page boundary"));
+	
+	buffer_ma &= ~PAGE_MASK;
+
+	info->ring.req_prod_pvt++;
+
+	/* Keep a private copy so we can reissue requests when recovering. */
+	info->shadow[id].req = *ring_req;
+
+	gnttab_free_grant_references(gref_head);
+
+	return 0;
+}
+
+
+
+/*
+ * Dequeue buffers and place them in the shared communication ring.
+ * Return when no more requests can be accepted or all buffers have 
+ * been queued.
+ *
+ * Signal XEN once the ring has been filled out.
+ */
+static void
+xb_startio(struct xb_softc *sc)
+{
+	struct bio		*bp;
+	int			queued = 0;
+	struct blkfront_info *info = sc->xb_info;
+	DPRINTK("");
+
+	mtx_assert(&blkif_io_lock, MA_OWNED);
+
+	while ((bp = bioq_takefirst(&sc->xb_bioq)) != NULL) {
+
+		if (RING_FULL(&info->ring)) 
+			goto wait;
+    	
+		if (blkif_queue_request(bp)) {
+		wait:
+			bioq_insert_head(&sc->xb_bioq, bp);
+			break;
+		}
+		queued++;
+	}
+
+	if (queued != 0) 
+		flush_requests(sc->xb_info);
+}
+
+static void
+blkif_int(void *xsc)
+{
+	struct xb_softc *sc = NULL;
+	struct bio *bp;
+	blkif_response_t *bret;
+	RING_IDX i, rp;
+	struct blkfront_info *info = xsc;
+	DPRINTK("");
+
+	TRACE_ENTER;
+
+	mtx_lock(&blkif_io_lock);
+
+	if (unlikely(info->connected != BLKIF_STATE_CONNECTED)) {
+		mtx_unlock(&blkif_io_lock);
+		return;
+	}
+
+ again:
+	rp = info->ring.sring->rsp_prod;
+	rmb(); /* Ensure we see queued responses up to 'rp'. */
+
+	for (i = info->ring.rsp_cons; i != rp; i++) {
+		unsigned long id;
+
+		bret = RING_GET_RESPONSE(&info->ring, i);
+		id   = bret->id;
+		bp   = (struct bio *)info->shadow[id].request;
+
+		blkif_completion(&info->shadow[id]);
+
+		ADD_ID_TO_FREELIST(info, id);
+
+		switch (bret->operation) {
+		case BLKIF_OP_READ:
+			/* had an unaligned buffer that needs to be copied */
+			if (bp->bio_driver1)
+				bcopy(bp->bio_driver2, bp->bio_data, bp->bio_bcount);
+			/* FALLTHROUGH */
+		case BLKIF_OP_WRITE:
+
+			/* free the copy buffer */
+			if (bp->bio_driver1) {
+				free(bp->bio_driver1, M_DEVBUF);
+				bp->bio_driver1 = NULL;
+			}
+
+			if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) {
+					printf("Bad return from blkdev data request: %x\n", 
+					  bret->status);
+				bp->bio_flags |= BIO_ERROR;
+			}
+
+			sc = (struct xb_softc *)bp->bio_disk->d_drv1;
+
+			if (bp->bio_flags & BIO_ERROR)
+				bp->bio_error = EIO;
+			else
+				bp->bio_resid = 0;
+
+			biodone(bp);
+			break;
+		default:
+			panic("received invalid operation");
+			break;
+		}
+	}
+
+	info->ring.rsp_cons = i;
+
+	if (i != info->ring.req_prod_pvt) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_RESPONSES(&info->ring, more_to_do);
+		if (more_to_do)
+			goto again;
+	} else {
+		info->ring.sring->rsp_event = i + 1;
+	}
+
+	kick_pending_request_queues(info);
+
+	mtx_unlock(&blkif_io_lock);
+}
+
+static void 
+blkif_free(struct blkfront_info *info, int suspend)
+{
+	
+/* Prevent new requests being issued until we fix things up. */
+	mtx_lock(&blkif_io_lock);
+	info->connected = suspend ? 
+		BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED; 
+	mtx_unlock(&blkif_io_lock);
+
+	/* Free resources associated with old device channel. */
+	if (info->ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(info->ring_ref, 
+					  info->ring.sring);
+		info->ring_ref = GRANT_INVALID_REF;
+		info->ring.sring = NULL;
+	}
+	if (info->irq)
+		unbind_from_irqhandler(info->irq);
+	info->irq = 0;
+
+}
+
+static void 
+blkif_completion(struct blk_shadow *s)
+{
+	int i;
+
+	for (i = 0; i < s->req.nr_segments; i++)
+		gnttab_end_foreign_access(s->req.seg[i].gref, 0UL);
+}
+
+static void 
+blkif_recover(struct blkfront_info *info)
+{
+	int i, j;
+	blkif_request_t *req;
+	struct blk_shadow *copy;
+
+	if (!info->sc)
+		return;
+
+	/* Stage 1: Make a safe copy of the shadow state. */
+	copy = (struct blk_shadow *)malloc(sizeof(info->shadow), M_DEVBUF, M_NOWAIT|M_ZERO);
+	memcpy(copy, info->shadow, sizeof(info->shadow));
+
+	/* Stage 2: Set up free list. */
+	memset(&info->shadow, 0, sizeof(info->shadow));
+	for (i = 0; i < BLK_RING_SIZE; i++)
+		info->shadow[i].req.id = i+1;
+	info->shadow_free = info->ring.req_prod_pvt;
+	info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
+
+	/* Stage 3: Find pending requests and requeue them. */
+	for (i = 0; i < BLK_RING_SIZE; i++) {
+		/* Not in use? */
+		if (copy[i].request == 0)
+			continue;
+
+		/* Grab a request slot and copy shadow state into it. */
+		req = RING_GET_REQUEST(
+			&info->ring, info->ring.req_prod_pvt);
+		*req = copy[i].req;
+
+		/* We get a new request id, and must reset the shadow state. */
+		req->id = GET_ID_FROM_FREELIST(info);
+		memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
+
+		/* Rewrite any grant references invalidated by suspend/resume. */
+		for (j = 0; j < req->nr_segments; j++)
+			gnttab_grant_foreign_access_ref(
+				req->seg[j].gref,
+				xenbus_get_otherend_id(info->xbdev),
+				pfn_to_mfn(info->shadow[req->id].frame[j]),
+				0 /* assume not readonly */);
+
+		info->shadow[req->id].req = *req;
+
+		info->ring.req_prod_pvt++;
+	}
+
+	free(copy, M_DEVBUF);
+
+	xenbus_set_state(info->xbdev, XenbusStateConnected); 
+	
+	/* Now safe for us to use the shared ring */
+	mtx_lock(&blkif_io_lock);
+	info->connected = BLKIF_STATE_CONNECTED;
+	mtx_unlock(&blkif_io_lock);
+
+	/* Send off requeued requests */
+	mtx_lock(&blkif_io_lock);
+	flush_requests(info);
+
+	/* Kick any other new requests queued since we resumed */
+	kick_pending_request_queues(info);
+	mtx_unlock(&blkif_io_lock);
+}
+
+/* ** Driver registration ** */
+static device_method_t blkfront_methods[] = { 
+	/* Device interface */ 
+	DEVMETHOD(device_probe,         blkfront_probe), 
+	DEVMETHOD(device_attach,        blkfront_attach), 
+	DEVMETHOD(device_detach,        blkfront_detach), 
+	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
+	DEVMETHOD(device_suspend,       blkfront_suspend), 
+	DEVMETHOD(device_resume,        blkfront_resume), 
+ 
+	/* Xenbus interface */
+	DEVMETHOD(xenbus_backend_changed, blkfront_backend_changed),
+
+	{ 0, 0 } 
+}; 
+
+static driver_t blkfront_driver = { 
+	"xbd", 
+	blkfront_methods, 
+	sizeof(struct blkfront_info),                      
+}; 
+devclass_t blkfront_devclass; 
+ 
+DRIVER_MODULE(xbd, xenbus, blkfront_driver, blkfront_devclass, 0, 0); 
+
+MTX_SYSINIT(ioreq, &blkif_io_lock, "BIO LOCK", MTX_NOWITNESS); /* XXX how does one enroll a lock? */
+

Property changes on: dev/xen/blkfront/blkfront.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/console/xencons_ring.h
===================================================================
--- dev/xen/console/xencons_ring.h	(.../stable/6/sys)	(revision 0)
+++ dev/xen/console/xencons_ring.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,20 @@
+/*
+ * $FreeBSD$
+ *
+ */
+#ifndef _XENCONS_RING_H
+#define _XENCONS_RING_H
+
+int xencons_ring_init(void);
+int xencons_ring_send(const char *data, unsigned len);
+void xencons_rx(char *buf, unsigned len);
+void xencons_tx(void);
+
+
+typedef void (xencons_receiver_func)(char *buf, unsigned len);
+void xencons_ring_register_receiver(xencons_receiver_func *f);
+
+void xencons_handle_input(void *unused);
+int xencons_has_input(void);
+
+#endif /* _XENCONS_RING_H */

Property changes on: dev/xen/console/xencons_ring.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/console/console.c
===================================================================
--- dev/xen/console/console.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/console/console.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,569 @@
+#include <sys/cdefs.h>
+
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <sys/cons.h>
+#include <sys/proc.h>
+#include <sys/kdb.h>
+
+#include <dev/xen/console/xencons_ring.h>
+#include <xen/interface/io/console.h>
+
+
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#endif
+
+static char driver_name[] = "xc";
+devclass_t xc_devclass; /* do not make static */
+static void	xcstart (struct tty *);
+static int	xcparam (struct tty *, struct termios *);
+static void	xcstop (struct tty *, int);
+static void	xc_timeout(void *);
+static void __xencons_tx_flush(void);
+static boolean_t xcons_putc(int c);
+
+/* switch console so that shutdown can occur gracefully */
+static void xc_shutdown(void *arg, int howto);
+static int xc_mute;
+
+static void xcons_force_flush(void);
+static void xencons_priv_interrupt(void *);
+
+static cn_probe_t       xccnprobe;
+static cn_init_t        xccninit;
+static cn_getc_t        xccngetc;
+static cn_putc_t        xccnputc;
+static cn_putc_t        xccnputc_dom0;
+static cn_checkc_t      xccncheckc;
+
+#define XC_POLLTIME 	(hz/10)
+
+CONS_DRIVER(xc, xccnprobe, xccninit, NULL, xccngetc, 
+	    xccncheckc, xccnputc, NULL);
+
+static int xen_console_up;
+static boolean_t xc_start_needed;
+static struct callout xc_callout;
+struct mtx              cn_mtx;
+
+#define RBUF_SIZE     1024
+#define RBUF_MASK(_i) ((_i)&(RBUF_SIZE-1))
+#define WBUF_SIZE     4096
+#define WBUF_MASK(_i) ((_i)&(WBUF_SIZE-1))
+static char wbuf[WBUF_SIZE];
+static char rbuf[RBUF_SIZE];
+static int rc, rp;
+static unsigned int cnsl_evt_reg;
+static unsigned int wc, wp; /* write_cons, write_prod */
+
+#define CDEV_MAJOR 12
+#define	XCUNIT(x)	(minor(x))
+#define ISTTYOPEN(tp)	((tp) && ((tp)->t_state & TS_ISOPEN))
+#define CN_LOCK_INIT(x, _name) \
+        mtx_init(&x, _name, NULL, MTX_DEF|MTX_RECURSE)
+
+#define CN_LOCK(l)        								\
+		do {											\
+				if (panicstr == NULL)					\
+                        mtx_lock(&(l));			\
+		} while (0)
+#define CN_UNLOCK(l)        							\
+		do {											\
+				if (panicstr == NULL)					\
+                        mtx_unlock(&(l));			\
+		} while (0)
+#define CN_LOCK_ASSERT(x)    mtx_assert(&x, MA_OWNED)
+#define CN_LOCK_DESTROY(x)   mtx_destroy(&x)
+
+
+static struct tty *xccons;
+
+struct xc_softc {
+	int    xc_unit;
+	struct cdev *xc_dev;
+};
+
+
+static d_open_t  xcopen;
+static d_close_t xcclose;
+static d_ioctl_t xcioctl;
+
+static struct cdevsw xc_cdevsw = {
+	.d_version =    D_VERSION,
+        .d_flags =      D_TTY | D_NEEDGIANT,
+        .d_name =       driver_name,
+        .d_open =       xcopen,
+        .d_close =      xcclose,
+        .d_read =       ttyread,
+        .d_write =      ttywrite,
+        .d_ioctl =      xcioctl,
+        .d_poll =       ttypoll,
+        .d_kqfilter =   ttykqfilter,
+};
+
+static void
+xccnprobe(struct consdev *cp)
+{
+	cp->cn_pri = CN_REMOTE;
+	cp->cn_tp = xccons;
+	sprintf(cp->cn_name, "%s0", driver_name);
+}
+
+
+static void
+xccninit(struct consdev *cp)
+{ 
+	CN_LOCK_INIT(cn_mtx,"XCONS LOCK");
+
+}
+int
+xccngetc(struct consdev *dev)
+{
+	int c;
+	if (xc_mute)
+	    	return 0;
+	do {
+		if ((c = xccncheckc(dev)) == -1) {
+#ifdef KDB
+			if (!kdb_active)
+#endif
+				/*
+				 * Polling without sleeping in Xen
+				 * doesn't work well.  Sleeping gives
+				 * other things like clock a chance to
+				 * run
+				 */
+				tsleep(&cn_mtx, PWAIT | PCATCH,
+				    "console sleep", XC_POLLTIME);
+		}
+	} while(c == -1);
+	return c;
+}
+
+int
+xccncheckc(struct consdev *dev)
+{
+	int ret = (xc_mute ? 0 : -1);
+
+	if (xencons_has_input())
+		xencons_handle_input(NULL);
+	
+	CN_LOCK(cn_mtx);
+	if ((rp - rc)) {
+		if (kdb_active) printf("%s:%d\n", __func__, __LINE__);
+		/* we need to return only one char */
+		ret = (int)rbuf[RBUF_MASK(rc)];
+		rc++;
+	}
+	CN_UNLOCK(cn_mtx);
+	return(ret);
+}
+
+static void
+xccnputc(struct consdev *dev, int c)
+{
+	xcons_putc(c);
+}
+
+static void
+xccnputc_dom0(struct consdev *dev, int c)
+{
+	HYPERVISOR_console_io(CONSOLEIO_write, 1, (char *)&c);
+}
+
+extern int db_active;
+static boolean_t
+xcons_putc(int c)
+{
+	int force_flush = xc_mute ||
+#ifdef DDB
+		db_active ||
+#endif
+		panicstr;	/* we're not gonna recover, so force
+				 * flush 
+				 */
+
+	if ((wp-wc) < (WBUF_SIZE-1)) {
+		if ((wbuf[WBUF_MASK(wp++)] = c) == '\n') {
+        		wbuf[WBUF_MASK(wp++)] = '\r';
+#ifdef notyet
+			if (force_flush)
+				xcons_force_flush();
+#endif
+		}
+	} else if (force_flush) {
+#ifdef notyet
+		xcons_force_flush();
+#endif	    	
+	}
+	if (cnsl_evt_reg)
+		__xencons_tx_flush();
+	
+	/* inform start path that we're pretty full */
+	return ((wp - wc) >= WBUF_SIZE - 100) ? TRUE : FALSE;
+}
+
+static void
+xc_identify(driver_t *driver, device_t parent)
+{
+	device_t child;
+	child = BUS_ADD_CHILD(parent, 0, driver_name, 0);
+	device_set_driver(child, driver);
+	device_set_desc(child, "Xen Console");
+}
+
+static int
+xc_probe(device_t dev)
+{
+	struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+
+	sc->xc_unit = device_get_unit(dev);
+	return (0);
+}
+
+static int
+xc_attach(device_t dev) 
+{
+	int error;
+	struct xc_softc *sc = (struct xc_softc *)device_get_softc(dev);
+	int error;
+
+	if (xen_start_info->flags & SIF_INITDOMAIN) {
+		xc_consdev.cn_putc = xccnputc_dom0;
+	} 
+
+	sc->xc_dev = make_dev(&xc_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "xc%r", 0);
+	xccons = ttyalloc();
+
+	sc->xc_dev->si_drv1 = (void *)sc;
+	sc->xc_dev->si_tty = xccons;
+			     
+	xccons->t_oproc = xcstart;
+	xccons->t_param = xcparam;
+	xccons->t_stop = xcstop;
+	xccons->t_dev = sc->xc_dev;
+
+	callout_init(&xc_callout, 0);
+
+	xencons_ring_init();
+
+	cnsl_evt_reg = 1;
+	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, xccons);
+    
+	if (xen_start_info->flags & SIF_INITDOMAIN) {
+			error = bind_virq_to_irqhandler(
+				 VIRQ_CONSOLE,
+				 0,
+				 "console",
+				 xencons_priv_interrupt,
+					 INTR_TYPE_TTY, NULL);
+		
+				KASSERT(error >= 0, ("can't register console interrupt"));
+	}
+
+	/* register handler to flush console on shutdown */
+	if ((EVENTHANDLER_REGISTER(shutdown_post_sync, xc_shutdown,
+				   NULL, SHUTDOWN_PRI_DEFAULT)) == NULL)
+		printf("xencons: shutdown event registration failed!\n");
+	
+	return (0);
+}
+
+/*
+ * return 0 for all console input, force flush all output.
+ */
+static void
+xc_shutdown(void *arg, int howto)
+{
+	xc_mute = 1;
+	xcons_force_flush();
+}
+
+void 
+xencons_rx(char *buf, unsigned len)
+{
+	int           i;
+	struct tty *tp = xccons;
+	
+#if 1
+	if (len > 0 && buf[0] == '`')
+			printf("%08lx %08lx\r",
+				HYPERVISOR_shared_info->evtchn_pending[0],
+				HYPERVISOR_shared_info->evtchn_mask[0]);
+#endif
+	for (i = 0; i < len; i++) {
+		if (xen_console_up
+#ifdef DDB
+			&& !kdb_active
+#endif
+			) 
+			(*linesw[tp->t_line]->l_rint)(buf[i], tp);
+		else
+			rbuf[RBUF_MASK(rp++)] = buf[i];
+	}
+}
+
+static void 
+__xencons_tx_flush(void)
+{
+	int        sz, work_done = 0;
+
+	CN_LOCK(cn_mtx);
+	while (wc != wp) {
+		int sent;
+		sz = wp - wc;
+		if (sz > (WBUF_SIZE - WBUF_MASK(wc)))
+			sz = WBUF_SIZE - WBUF_MASK(wc);
+		if (xen_start_info->flags & SIF_INITDOMAIN) {
+			HYPERVISOR_console_io(CONSOLEIO_write, sz, &wbuf[WBUF_MASK(wc)]);
+			wc += sz;
+		} else {
+			sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+			if (sent == 0) 
+				break;
+			wc += sent;
+		}
+		work_done = 1;
+	}
+	CN_UNLOCK(cn_mtx);
+
+	/*
+	 * ttwakeup calls routines using blocking locks
+	 *
+	 */
+	if (work_done && xen_console_up && curthread->td_critnest == 0)
+		ttwakeup(xccons);
+}
+
+void
+xencons_tx(void)
+{
+	__xencons_tx_flush();
+}
+
+static void
+xencons_priv_interrupt(void *arg)
+{
+
+	static char rbuf[16];
+	int         l;
+
+	while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
+		xencons_rx(rbuf, l);
+
+	xencons_tx();
+}
+
+int
+xcopen(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+	struct xc_softc *sc;
+	int unit = XCUNIT(dev);
+	struct tty *tp;
+	int s, error;
+
+	sc = (struct xc_softc *)device_get_softc(
+		devclass_get_device(xc_devclass, unit));
+	if (sc == NULL)
+		return (ENXIO);
+    
+	tp = dev->si_tty;
+	s = spltty();
+	if (!ISTTYOPEN(tp)) {
+		tp->t_state |= TS_CARR_ON;
+		ttychars(tp);
+		tp->t_iflag = TTYDEF_IFLAG;
+		tp->t_oflag = TTYDEF_OFLAG;
+		tp->t_cflag = TTYDEF_CFLAG|CLOCAL;
+		tp->t_lflag = TTYDEF_LFLAG;
+		tp->t_ispeed = tp->t_ospeed = TTYDEF_SPEED;
+		xcparam(tp, &tp->t_termios);
+		ttsetwater(tp);
+	} else if (tp->t_state & TS_XCLUDE && suser(td)) {
+		splx(s);
+		return (EBUSY);
+	}
+	splx(s);
+
+	xen_console_up = 1;
+
+	error =  (*linesw[tp->t_line]->l_open)(dev, tp);
+	return error;
+}
+
+int
+xcclose(struct cdev *dev, int flag, int mode, struct thread *td)
+{
+	struct tty *tp = dev->si_tty;
+    
+	if (tp == NULL)
+		return (0);
+	xen_console_up = 0;
+    
+	spltty();
+	(*linesw[tp->t_line]->l_close)(tp, flag);
+	tty_close(tp);
+	spl0();
+	return (0);
+}
+
+
+int
+xcioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
+{
+	struct tty *tp = dev->si_tty;
+	int error;
+    
+	error = (*linesw[tp->t_line]->l_ioctl)(tp, cmd, data, flag, td);
+	if (error != ENOIOCTL)
+		return (error);
+
+	error = ttioctl(tp, cmd, data, flag);
+
+	if (error != ENOIOCTL)
+		return (error);
+
+	return (ENOTTY);
+}
+
+static inline int 
+__xencons_put_char(int ch)
+{
+	char _ch = (char)ch;
+	if ((wp - wc) == WBUF_SIZE)
+		return 0;
+	wbuf[WBUF_MASK(wp++)] = _ch;
+	return 1;
+}
+
+
+static void
+xcstart(struct tty *tp)
+{
+	boolean_t cons_full = FALSE;
+
+	CN_LOCK(cn_mtx);
+	if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) {
+			CN_UNLOCK(cn_mtx);
+
+		ttwwakeup(tp);
+		return;
+	}
+
+	tp->t_state |= TS_BUSY;
+	CN_UNLOCK(cn_mtx);
+
+	while (tp->t_outq.c_cc != 0 && !cons_full)
+		cons_full = xcons_putc(getc(&tp->t_outq));
+
+	/* if the console is close to full leave our state as busy */
+	if (!cons_full) {
+			CN_LOCK(cn_mtx);
+			tp->t_state &= ~TS_BUSY;
+			CN_UNLOCK(cn_mtx);
+			ttwwakeup(tp);
+	} else {
+	    	/* let the timeout kick us in a bit */
+	    	xc_start_needed = TRUE;
+	}
+
+}
+
+static void
+xcstop(struct tty *tp, int flag)
+{
+
+	if (tp->t_state & TS_BUSY) {
+		if ((tp->t_state & TS_TTSTOP) == 0) {
+			tp->t_state |= TS_FLUSH;
+		}
+	}
+}
+
+static void
+xc_timeout(void *v)
+{
+	struct	tty *tp;
+	int 	c;
+
+	tp = (struct tty *)v;
+
+	while ((c = xccncheckc(NULL)) != -1) {
+		if (tp->t_state & TS_ISOPEN) {
+			(*linesw[tp->t_line]->l_rint)(c, tp);
+		}
+	}
+
+	if (xc_start_needed) {
+	    	xc_start_needed = FALSE;
+		xcstart(tp);
+	}
+
+	callout_reset(&xc_callout, XC_POLLTIME, xc_timeout, tp);
+}
+
+/*
+ * Set line parameters.
+ */
+int
+xcparam(struct tty *tp, struct termios *t)
+{
+	tp->t_ispeed = t->c_ispeed;
+	tp->t_ospeed = t->c_ospeed;
+	tp->t_cflag = t->c_cflag;
+	return (0);
+}
+
+
+static device_method_t xc_methods[] = {
+	DEVMETHOD(device_identify, xc_identify),
+	DEVMETHOD(device_probe, xc_probe),
+	DEVMETHOD(device_attach, xc_attach),
+	{0, 0}
+};
+
+static driver_t xc_driver = {
+	driver_name,
+	xc_methods,
+	sizeof(struct xc_softc),
+};
+
+/*** Forcibly flush console data before dying. ***/
+void 
+xcons_force_flush(void)
+{
+	int        sz;
+
+	if (xen_start_info->flags & SIF_INITDOMAIN)
+		return;
+
+	/* Spin until console data is flushed through to the domain controller. */
+	while (wc != wp) {
+		int sent = 0;
+		if ((sz = wp - wc) == 0)
+			continue;
+		
+		sent = xencons_ring_send(&wbuf[WBUF_MASK(wc)], sz);
+		if (sent > 0)
+			wc += sent;		
+	}
+}
+
+DRIVER_MODULE(xc, nexus, xc_driver, xc_devclass, 0, 0);

Property changes on: dev/xen/console/console.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/console/xencons_ring.c
===================================================================
--- dev/xen/console/xencons_ring.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/console/xencons_ring.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,165 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/consio.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/tty.h>
+#include <sys/systm.h>
+#include <sys/taskqueue.h>
+#include <sys/conf.h>
+#include <sys/kernel.h>
+#include <sys/bus.h>
+#include <sys/cons.h>
+
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <sys/cons.h>
+
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <xen/interface/io/console.h>
+
+#include <dev/xen/console/xencons_ring.h>
+#include <xen/evtchn.h>
+#include <xen/interface/io/console.h>
+
+#define console_evtchn	console.domU.evtchn
+static unsigned int console_irq;
+extern char *console_page;
+extern struct mtx              cn_mtx;
+
+static inline struct xencons_interface *
+xencons_interface(void)
+{
+	return (struct xencons_interface *)console_page;
+}
+
+
+int
+xencons_has_input(void)
+{
+	struct xencons_interface *intf; 
+
+	intf = xencons_interface();		
+
+	return (intf->in_cons != intf->in_prod);
+}
+
+
+int 
+xencons_ring_send(const char *data, unsigned len)
+{
+	struct xencons_interface *intf; 
+	XENCONS_RING_IDX cons, prod;
+	int sent;
+
+	intf = xencons_interface();
+	cons = intf->out_cons;
+	prod = intf->out_prod;
+	sent = 0;
+
+	mb();
+	KASSERT((prod - cons) <= sizeof(intf->out),
+		("console send ring inconsistent"));
+	
+	while ((sent < len) && ((prod - cons) < sizeof(intf->out)))
+		intf->out[MASK_XENCONS_IDX(prod++, intf->out)] = data[sent++];
+
+	wmb();
+	intf->out_prod = prod;
+
+	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+	return sent;
+
+}	
+
+
+static xencons_receiver_func *xencons_receiver;
+
+void 
+xencons_handle_input(void *unused)
+{
+	struct xencons_interface *intf;
+	XENCONS_RING_IDX cons, prod;
+
+	mtx_lock(&cn_mtx);
+	intf = xencons_interface();
+
+	cons = intf->in_cons;
+	prod = intf->in_prod;
+
+	/* XXX needs locking */
+	while (cons != prod) {
+		xencons_rx(intf->in + MASK_XENCONS_IDX(cons, intf->in), 1);
+		cons++;
+	}
+
+	mb();
+	intf->in_cons = cons;
+
+	notify_remote_via_evtchn(xen_start_info->console_evtchn);
+
+	xencons_tx();
+	mtx_unlock(&cn_mtx);
+}
+
+void 
+xencons_ring_register_receiver(xencons_receiver_func *f)
+{
+	xencons_receiver = f;
+}
+
+int
+xencons_ring_init(void)
+{
+	int err;
+
+	if (!xen_start_info->console_evtchn)
+		return 0;
+
+	err = bind_caller_port_to_irqhandler(xen_start_info->console_evtchn,
+		"xencons", xencons_handle_input, NULL,
+		INTR_TYPE_MISC | INTR_MPSAFE, &console_irq);
+	if (err) {
+		return err;
+	}
+
+	return 0;
+}
+
+extern void xencons_suspend(void);
+extern void xencons_resume(void);
+
+void 
+xencons_suspend(void)
+{
+
+	if (!xen_start_info->console_evtchn)
+		return;
+
+	unbind_from_irqhandler(console_irq);
+}
+
+void 
+xencons_resume(void)
+{
+
+	(void)xencons_ring_init();
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 8
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */

Property changes on: dev/xen/console/xencons_ring.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/pcifront/pcifront.c
===================================================================
--- dev/xen/pcifront/pcifront.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/pcifront/pcifront.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,688 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/module.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+
+#include <machine/vmparam.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <machine/frame.h>
+
+#include <sys/bus.h>
+#include <sys/rman.h>
+
+#include <machine/intr_machdep.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <machine/xen-public/io/pciif.h>
+
+#include <sys/pciio.h>
+#include <dev/pci/pcivar.h>
+#include "pcib_if.h"
+
+#ifdef XEN_PCIDEV_FE_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+#define WPRINTF(fmt, args...) \
+    printf("pcifront (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define INVALID_GRANT_REF (0)
+#define INVALID_EVTCHN    (-1)
+#define virt_to_mfn(x) (vtomach(x) >> PAGE_SHIFT)
+
+struct pcifront_device {
+	STAILQ_ENTRY(pcifront_device) next;
+
+	struct xenbus_device *xdev;
+
+	int unit;
+	int evtchn;
+	int gnt_ref;
+
+	/* Lock this when doing any operations in sh_info */
+	struct mtx sh_info_lock;
+	struct xen_pci_sharedinfo *sh_info;
+
+	device_t ndev;
+
+	int ref_cnt;
+};
+
+static STAILQ_HEAD(pcifront_dlist, pcifront_device) pdev_list = STAILQ_HEAD_INITIALIZER(pdev_list);
+
+struct xpcib_softc {
+	int domain;
+	int bus;
+	struct pcifront_device *pdev;
+};
+
+/* Allocate a PCI device structure */
+static struct pcifront_device *
+alloc_pdev(struct xenbus_device *xdev)
+{
+	struct pcifront_device *pdev = NULL;
+	int err, unit;
+
+	err = sscanf(xdev->nodename, "device/pci/%d", &unit);
+	if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err, "Error scanning pci device instance number");
+		goto out;
+	}
+
+	pdev = (struct pcifront_device *)malloc(sizeof(struct pcifront_device), M_DEVBUF, M_NOWAIT);
+	if (pdev == NULL) {
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err, "Error allocating pcifront_device struct");
+		goto out;
+	}
+	pdev->unit = unit;
+	pdev->xdev = xdev;
+	pdev->ref_cnt = 1;
+
+	pdev->sh_info = (struct xen_pci_sharedinfo *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pdev->sh_info == NULL) {
+		free(pdev, M_DEVBUF);
+		pdev = NULL;
+		err = -ENOMEM;
+		xenbus_dev_fatal(xdev, err, "Error allocating sh_info struct");
+		goto out;
+	}
+	pdev->sh_info->flags = 0;
+
+	xdev->data = pdev;
+
+	mtx_init(&pdev->sh_info_lock, "info_lock", "pci shared dev info lock", MTX_DEF);
+
+	pdev->evtchn = INVALID_EVTCHN;
+	pdev->gnt_ref = INVALID_GRANT_REF;
+
+	STAILQ_INSERT_TAIL(&pdev_list, pdev, next);
+
+	DPRINTF("Allocated pdev @ 0x%p (unit=%d)\n", pdev, unit);
+
+ out:
+	return pdev;
+}
+
+/* Hold a reference to a pcifront device */
+static void
+get_pdev(struct pcifront_device *pdev)
+{
+	pdev->ref_cnt++;
+}
+
+/* Release a reference to a pcifront device */
+static void
+put_pdev(struct pcifront_device *pdev)
+{
+	if (--pdev->ref_cnt > 0)
+		return;
+
+	DPRINTF("freeing pdev @ 0x%p (ref_cnt=%d)\n", pdev, pdev->ref_cnt);
+
+	if (pdev->evtchn != INVALID_EVTCHN)
+		xenbus_free_evtchn(pdev->xdev, pdev->evtchn);
+
+	if (pdev->gnt_ref != INVALID_GRANT_REF)
+		gnttab_end_foreign_access(pdev->gnt_ref, 0, (void *)pdev->sh_info);
+
+	pdev->xdev->data = NULL;
+
+	free(pdev, M_DEVBUF);
+}
+
+
+/* Write to the xenbus info needed by backend */
+static int
+pcifront_publish_info(struct pcifront_device *pdev)
+{
+	int err = 0;
+	struct xenbus_transaction *trans;
+
+	err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
+	if (err < 0) {
+		WPRINTF("error granting access to ring page\n");
+		goto out;
+	}
+
+	pdev->gnt_ref = err;
+
+	err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
+	if (err)
+		goto out;
+
+ do_publish:
+	trans = xenbus_transaction_start();
+	if (IS_ERR(trans)) {
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error writing configuration for backend "
+						 "(start transaction)");
+		goto out;
+	}
+
+	err = xenbus_printf(trans, pdev->xdev->nodename,
+						"pci-op-ref", "%u", pdev->gnt_ref);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+							"event-channel", "%u", pdev->evtchn);
+	if (!err)
+		err = xenbus_printf(trans, pdev->xdev->nodename,
+							"magic", XEN_PCI_MAGIC);
+	if (!err)
+		err = xenbus_switch_state(pdev->xdev, trans,
+								  XenbusStateInitialised);
+
+	if (err) {
+		xenbus_transaction_end(trans, 1);
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error writing configuration for backend");
+		goto out;
+	} else {
+		err = xenbus_transaction_end(trans, 0);
+		if (err == -EAGAIN)
+			goto do_publish;
+		else if (err) {
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Error completing transaction for backend");
+			goto out;
+		}
+	}
+
+ out:
+	return err;
+}
+
+/* The backend is now connected so complete the connection process on our side */
+static int
+pcifront_connect(struct pcifront_device *pdev)
+{
+	device_t nexus;
+	devclass_t nexus_devclass;
+
+	/* We will add our device as a child of the nexus0 device */
+	if (!(nexus_devclass = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(nexus_devclass, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		return -1;
+	}
+
+	/* Create a newbus device representing this frontend instance */
+	pdev->ndev = BUS_ADD_CHILD(nexus, 0, "xpcife", pdev->unit);
+	if (!pdev->ndev) {
+		WPRINTF("could not create xpcife%d!\n", pdev->unit);
+		return -EFAULT;
+	}
+	get_pdev(pdev);
+	device_set_ivars(pdev->ndev, pdev);
+
+	/* Good to go connected now */
+	xenbus_switch_state(pdev->xdev, NULL, XenbusStateConnected);
+
+	printf("pcifront: connected to %s\n", pdev->xdev->nodename);
+
+	mtx_lock(&Giant);
+	device_probe_and_attach(pdev->ndev);
+	mtx_unlock(&Giant);
+
+	return 0;
+}
+
+/* The backend is closing so process a disconnect */
+static int
+pcifront_disconnect(struct pcifront_device *pdev)
+{
+	int err = 0;
+	XenbusState prev_state;
+
+	prev_state = xenbus_read_driver_state(pdev->xdev->nodename);
+
+	if (prev_state < XenbusStateClosing) {
+		err = xenbus_switch_state(pdev->xdev, NULL, XenbusStateClosing);
+		if (!err && prev_state == XenbusStateConnected) {
+			/* TODO - need to detach the newbus devices */
+		}
+	}
+
+	return err;
+}
+
+/* Process a probe from the xenbus */
+static int
+pcifront_probe(struct xenbus_device *xdev,
+			   const struct xenbus_device_id *id)
+{
+	int err = 0;
+	struct pcifront_device *pdev;
+
+	DPRINTF("xenbus probing\n");
+
+	if ((pdev = alloc_pdev(xdev)) == NULL)
+		goto out;
+
+	err = pcifront_publish_info(pdev);
+
+ out:
+	if (err)
+		put_pdev(pdev);
+	return err;
+}
+
+/* Remove the xenbus PCI device */
+static int
+pcifront_remove(struct xenbus_device *xdev)
+{
+	DPRINTF("removing xenbus device node (%s)\n", xdev->nodename);
+	if (xdev->data)
+		put_pdev(xdev->data);
+	return 0;
+}
+
+/* Called by xenbus when our backend node changes state */
+static void
+pcifront_backend_changed(struct xenbus_device *xdev,
+						 XenbusState be_state)
+{
+	struct pcifront_device *pdev = xdev->data;
+
+	switch (be_state) {
+	case XenbusStateClosing:
+		DPRINTF("backend closing (%s)\n", xdev->nodename);
+		pcifront_disconnect(pdev);
+		break;
+
+	case XenbusStateClosed:
+		DPRINTF("backend closed (%s)\n", xdev->nodename);
+		pcifront_disconnect(pdev);
+		break;
+
+	case XenbusStateConnected:
+		DPRINTF("backend connected (%s)\n", xdev->nodename);
+		pcifront_connect(pdev);
+		break;
+		
+	default:
+		break;
+	}
+}
+
+/* Process PCI operation */
+static int
+do_pci_op(struct pcifront_device *pdev, struct xen_pci_op *op)
+{
+	int err = 0;
+	struct xen_pci_op *active_op = &pdev->sh_info->op;
+	evtchn_port_t port = pdev->evtchn;
+	time_t timeout;
+
+	mtx_lock(&pdev->sh_info_lock);
+
+	memcpy(active_op, op, sizeof(struct xen_pci_op));
+
+	/* Go */
+	wmb();
+	set_bit(_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags);
+	notify_remote_via_evtchn(port);
+
+	timeout = time_uptime + 2;
+
+	clear_evtchn(port);
+
+	/* Spin while waiting for the answer */
+	while (test_bit
+	       (_XEN_PCIF_active, (unsigned long *)&pdev->sh_info->flags)) {
+		int err = HYPERVISOR_poll(&port, 1, 3 * hz);
+		if (err)
+			panic("Failed HYPERVISOR_poll: err=%d", err);
+		clear_evtchn(port);
+		if (time_uptime > timeout) {
+			WPRINTF("pciback not responding!!!\n");
+			clear_bit(_XEN_PCIF_active,
+				  (unsigned long *)&pdev->sh_info->flags);
+			err = XEN_PCI_ERR_dev_not_found;
+			goto out;
+		}
+	}
+
+	memcpy(op, active_op, sizeof(struct xen_pci_op));
+
+	err = op->err;
+ out:
+	mtx_unlock(&pdev->sh_info_lock);
+	return err;
+}
+
+/* ** XenBus Driver registration ** */
+
+static struct xenbus_device_id pcifront_ids[] = {
+	{ "pci" },
+	{ "" }
+};
+
+static struct xenbus_driver pcifront = {
+	.name = "pcifront",
+	.ids = pcifront_ids,
+	.probe = pcifront_probe,
+	.remove = pcifront_remove,
+	.otherend_changed = pcifront_backend_changed,
+};
+
+/* Register the driver with xenbus during sys init */
+static void
+pcifront_init(void *unused)
+{
+	if ((xen_start_info->flags & SIF_INITDOMAIN))
+		return;
+
+	DPRINTF("xenbus registering\n");
+
+	xenbus_register_frontend(&pcifront);
+}
+
+SYSINIT(pciif, SI_SUB_PSEUDO, SI_ORDER_ANY, pcifront_init, NULL)
+
+
+/* Newbus xpcife device driver probe */
+static int
+xpcife_probe(device_t dev)
+{
+#ifdef XEN_PCIDEV_FE_DEBUG
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+	DPRINTF("xpcife probe (unit=%d)\n", pdev->unit);
+#endif
+	return 0;
+}
+
+/* Newbus xpcife device driver attach */
+static int
+xpcife_attach(device_t dev) 
+{
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(dev);
+	int i, num_roots, len, err;
+	char str[64];
+	unsigned int domain, bus;
+
+	DPRINTF("xpcife attach (unit=%d)\n", pdev->unit);
+
+	err = xenbus_scanf(NULL, pdev->xdev->otherend,
+					   "root_num", "%d", &num_roots);
+	if (err != 1) {
+		if (err == 0)
+			err = -EINVAL;
+		xenbus_dev_fatal(pdev->xdev, err,
+						 "Error reading number of PCI roots");
+		goto out;
+	}
+
+	/* Add a pcib device for each root */
+	for (i = 0; i < num_roots; i++) {
+		device_t child;
+
+		len = snprintf(str, sizeof(str), "root-%d", i);
+		if (unlikely(len >= (sizeof(str) - 1))) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		err = xenbus_scanf(NULL, pdev->xdev->otherend, str,
+						   "%x:%x", &domain, &bus);
+		if (err != 2) {
+			if (err >= 0)
+				err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Error reading PCI root %d", i);
+			goto out;
+		}
+		err = 0;
+		if (domain != pdev->xdev->otherend_id) {
+			err = -EINVAL;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Domain mismatch %d != %d", domain, pdev->xdev->otherend_id);
+			goto out;
+		}
+		
+		child = device_add_child(dev, "pcib", bus);
+		if (!child) {
+			err = -ENOMEM;
+			xenbus_dev_fatal(pdev->xdev, err,
+							 "Unable to create pcib%d", bus);
+			goto out;
+		}
+	}
+
+ out:
+	return bus_generic_attach(dev);
+}
+
+static devclass_t xpcife_devclass;
+
+static device_method_t xpcife_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe, xpcife_probe),
+	DEVMETHOD(device_attach, xpcife_attach),
+	DEVMETHOD(device_detach,	bus_generic_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+    /* Bus interface */
+    DEVMETHOD(bus_print_child,		bus_generic_print_child),
+    DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
+    DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
+    DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
+    DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
+    DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
+    DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
+	{0, 0}
+};
+
+static driver_t xpcife_driver = {
+	"xpcife",
+	xpcife_methods,
+	0,
+};
+
+DRIVER_MODULE(xpcife, nexus, xpcife_driver, xpcife_devclass, 0, 0);
+
+
+/* Newbus xen pcib device driver probe */
+static int
+xpcib_probe(device_t dev)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct pcifront_device *pdev = (struct pcifront_device *)device_get_ivars(device_get_parent(dev));
+
+	DPRINTF("xpcib probe (bus=%d)\n", device_get_unit(dev));
+
+	sc->domain = pdev->xdev->otherend_id;
+	sc->bus = device_get_unit(dev);
+	sc->pdev = pdev;
+	
+	return 0;
+}
+
+/* Newbus xen pcib device driver attach */
+static int
+xpcib_attach(device_t dev) 
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+
+	DPRINTF("xpcib attach (bus=%d)\n", sc->bus);
+
+	device_add_child(dev, "pci", sc->bus);
+	return bus_generic_attach(dev);
+}
+
+static int
+xpcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	switch (which) {
+	case  PCIB_IVAR_BUS:
+		*result = sc->bus;
+		return 0;
+	}
+	return ENOENT;
+}
+
+/* Return the number of slots supported */
+static int
+xpcib_maxslots(device_t dev)
+{
+	return 31;
+}
+
+#define PCI_DEVFN(slot,func)	((((slot) & 0x1f) << 3) | ((func) & 0x07))
+
+/* Read configuration space register */
+static u_int32_t
+xpcib_read_config(device_t dev, int bus, int slot, int func,
+				  int reg, int bytes)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_read,
+		.domain = sc->domain,
+		.bus    = sc->bus,
+		.devfn  = PCI_DEVFN(slot, func),
+		.offset = reg,
+		.size   = bytes,
+	};
+	int err;
+
+	err = do_pci_op(sc->pdev, &op);
+	
+	DPRINTF("read config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+			bus, slot, func, reg, bytes, op.value, err);
+
+	if (err)
+		op.value = ~0;
+
+	return op.value;
+}
+
+/* Write configuration space register */
+static void
+xpcib_write_config(device_t dev, int bus, int slot, int func,
+				   int reg, u_int32_t data, int bytes)
+{
+	struct xpcib_softc *sc = (struct xpcib_softc *)device_get_softc(dev);
+	struct xen_pci_op op = {
+		.cmd    = XEN_PCI_OP_conf_write,
+		.domain = sc->domain,
+		.bus    = sc->bus,
+		.devfn  = PCI_DEVFN(slot, func),
+		.offset = reg,
+		.size   = bytes,
+		.value  = data,
+	};
+	int err;
+
+	err = do_pci_op(sc->pdev, &op);
+
+	DPRINTF("write config (b=%d, s=%d, f=%d, reg=%d, len=%d, val=%x, err=%d)\n",
+			bus, slot, func, reg, bytes, data, err);
+}
+
+static int
+xpcib_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+	struct pci_devinfo *dinfo = device_get_ivars(dev);
+	pcicfgregs *cfg = &dinfo->cfg;
+
+	DPRINTF("route intr (pin=%d, line=%d)\n", pin, cfg->intline);
+
+	return cfg->intline;
+}
+
+static device_method_t xpcib_methods[] = {
+    /* Device interface */
+    DEVMETHOD(device_probe,			xpcib_probe),
+    DEVMETHOD(device_attach,		xpcib_attach),
+    DEVMETHOD(device_detach,		bus_generic_detach),
+    DEVMETHOD(device_shutdown,		bus_generic_shutdown),
+    DEVMETHOD(device_suspend,		bus_generic_suspend),
+    DEVMETHOD(device_resume,		bus_generic_resume),
+
+    /* Bus interface */
+    DEVMETHOD(bus_print_child,		bus_generic_print_child),
+    DEVMETHOD(bus_read_ivar,		xpcib_read_ivar),
+    DEVMETHOD(bus_alloc_resource,	bus_generic_alloc_resource),
+    DEVMETHOD(bus_activate_resource,	bus_generic_activate_resource),
+    DEVMETHOD(bus_release_resource,	bus_generic_release_resource),
+    DEVMETHOD(bus_deactivate_resource,	bus_generic_deactivate_resource),
+    DEVMETHOD(bus_setup_intr,		bus_generic_setup_intr),
+    DEVMETHOD(bus_teardown_intr,	bus_generic_teardown_intr),
+
+    /* pcib interface */
+    DEVMETHOD(pcib_maxslots,		xpcib_maxslots),
+    DEVMETHOD(pcib_read_config,		xpcib_read_config),
+    DEVMETHOD(pcib_write_config,	xpcib_write_config),
+    DEVMETHOD(pcib_route_interrupt,	xpcib_route_interrupt),
+    { 0, 0 }
+};
+
+static devclass_t xpcib_devclass;
+
+DEFINE_CLASS_0(pcib, xpcib_driver, xpcib_methods, sizeof(struct xpcib_softc));
+DRIVER_MODULE(pcib, xpcife, xpcib_driver, xpcib_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */

Property changes on: dev/xen/pcifront/pcifront.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/balloon/balloon.c
===================================================================
--- dev/xen/balloon/balloon.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/balloon/balloon.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,565 @@
+/******************************************************************************
+ * balloon.c
+ *
+ * Xen balloon driver - enables returning/claiming memory to/from Xen.
+ *
+ * Copyright (c) 2003, B Dragovic
+ * Copyright (c) 2003-2004, M Williamson, K Fraser
+ * Copyright (c) 2005 Dan M. Smith, IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sysctl.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/xenvar.h>
+#include <xen/hypervisor.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+
+MALLOC_DEFINE(M_BALLOON, "Balloon", "Xen Balloon Driver");
+
+struct mtx balloon_mutex;
+
+/*
+ * Protects atomic reservation decrease/increase against concurrent increases.
+ * Also protects non-atomic updates of current_pages and driver_pages, and
+ * balloon lists.
+ */
+struct mtx balloon_lock;
+
+/* We increase/decrease in batches which fit in a page */
+static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
+#define ARRAY_SIZE(A)	(sizeof(A) / sizeof(A[0]))
+
+struct balloon_stats {
+	/* We aim for 'current allocation' == 'target allocation'. */
+	unsigned long current_pages;
+	unsigned long target_pages;
+	/* We may hit the hard limit in Xen. If we do then we remember it. */
+	unsigned long hard_limit;
+	/*
+	 * Drivers may alter the memory reservation independently, but they
+	 * must inform the balloon driver so we avoid hitting the hard limit.
+	 */
+	unsigned long driver_pages;
+	/* Number of pages in high- and low-memory balloons. */
+	unsigned long balloon_low;
+	unsigned long balloon_high;
+};
+
+static struct balloon_stats balloon_stats;
+#define bs balloon_stats
+
+SYSCTL_DECL(_dev_xen);
+SYSCTL_NODE(_dev_xen, OID_AUTO, balloon, CTLFLAG_RD, NULL, "Balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, current, CTLFLAG_RD,
+    &bs.current_pages, 0, "Current allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, target, CTLFLAG_RD,
+    &bs.target_pages, 0, "Target allocation");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, driver_pages, CTLFLAG_RD,
+    &bs.driver_pages, 0, "Driver pages");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, hard_limit, CTLFLAG_RD,
+    &bs.hard_limit, 0, "Xen hard limit");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
+    &bs.balloon_low, 0, "Low-mem balloon");
+SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
+    &bs.balloon_high, 0, "High-mem balloon");
+
+struct balloon_entry {
+	vm_page_t page;
+	STAILQ_ENTRY(balloon_entry) list;
+};
+
+/* List of ballooned pages, threaded through the mem_map array. */
+static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+
+/* Main work function, always executed in process context. */
+static void balloon_process(void *unused);
+
+#define IPRINTK(fmt, args...) \
+	printk(KERN_INFO "xen_mem: " fmt, ##args)
+#define WPRINTK(fmt, args...) \
+	printk(KERN_WARNING "xen_mem: " fmt, ##args)
+
+/* balloon_append: add the given page to the balloon. */
+static void 
+balloon_append(vm_page_t page)
+{
+	struct balloon_entry *entry;
+
+	entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_WAITOK);
+	entry->page = page;
+	STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
+	bs.balloon_low++;
+}
+
+/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
+static vm_page_t
+balloon_retrieve(void)
+{
+	vm_page_t page;
+	struct balloon_entry *entry;
+
+	if (STAILQ_EMPTY(&ballooned_pages))
+		return NULL;
+
+	entry = STAILQ_FIRST(&ballooned_pages);
+	STAILQ_REMOVE_HEAD(&ballooned_pages, list);
+
+	page = entry->page;
+	free(entry, M_DEVBUF);
+	
+	bs.balloon_low--;
+
+	return page;
+}
+
+static void 
+balloon_alarm(void *unused)
+{
+	wakeup(balloon_process);
+}
+
+static unsigned long 
+current_target(void)
+{
+	unsigned long target = min(bs.target_pages, bs.hard_limit);
+	if (target > (bs.current_pages + bs.balloon_low + bs.balloon_high))
+		target = bs.current_pages + bs.balloon_low + bs.balloon_high;
+	return target;
+}
+
+static unsigned long
+minimum_target(void)
+{
+#ifdef XENHVM
+#define max_pfn physmem
+#endif
+	unsigned long min_pages, curr_pages = current_target();
+
+#define MB2PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
+	/* Simple continuous piecewiese linear function:
+	 *  max MiB -> min MiB	gradient
+	 *       0	   0
+	 *      16	  16
+	 *      32	  24
+	 *     128	  72	(1/2)
+	 *     512 	 168	(1/4)
+	 *    2048	 360	(1/8)
+	 *    8192	 552	(1/32)
+	 *   32768	1320
+	 *  131072	4392
+	 */
+	if (max_pfn < MB2PAGES(128))
+		min_pages = MB2PAGES(8) + (max_pfn >> 1);
+	else if (max_pfn < MB2PAGES(512))
+		min_pages = MB2PAGES(40) + (max_pfn >> 2);
+	else if (max_pfn < MB2PAGES(2048))
+		min_pages = MB2PAGES(104) + (max_pfn >> 3);
+	else
+		min_pages = MB2PAGES(296) + (max_pfn >> 5);
+#undef MB2PAGES
+
+	/* Don't enforce growth */
+	return min(min_pages, curr_pages);
+#ifndef CONFIG_XEN
+#undef max_pfn
+#endif
+}
+
+static int 
+increase_reservation(unsigned long nr_pages)
+{
+	unsigned long  pfn, i;
+	struct balloon_entry *entry;
+	vm_page_t      page;
+	long           rc;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > ARRAY_SIZE(frame_list))
+		nr_pages = ARRAY_SIZE(frame_list);
+
+	mtx_lock(&balloon_lock);
+
+	for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
+	     i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
+		KASSERT(entry, ("ballooned_pages list corrupt"));
+		page = entry->page;
+		frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+	}
+
+	set_xen_guest_handle(reservation.extent_start, frame_list);
+	reservation.nr_extents   = nr_pages;
+	rc = HYPERVISOR_memory_op(
+		XENMEM_populate_physmap, &reservation);
+	if (rc < nr_pages) {
+		if (rc > 0) {
+			int ret;
+
+			/* We hit the Xen hard limit: reprobe. */
+			reservation.nr_extents = rc;
+			ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+					&reservation);
+			KASSERT(ret == rc, ("HYPERVISOR_memory_op failed"));
+		}
+		if (rc >= 0)
+			bs.hard_limit = (bs.current_pages + rc -
+					 bs.driver_pages);
+		goto out;
+	}
+
+	for (i = 0; i < nr_pages; i++) {
+		page = balloon_retrieve();
+		KASSERT(page, ("balloon_retrieve failed"));
+
+		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+		KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
+			!phys_to_machine_mapping_valid(pfn)),
+		    ("auto translated physmap but mapping is valid"));
+
+		set_phys_to_machine(pfn, frame_list[i]);
+
+#ifndef XENHVM
+		/* Link back into the page tables if not highmem. */
+		if (pfn < max_low_pfn) {
+			int ret;
+			ret = HYPERVISOR_update_va_mapping(
+				(unsigned long)__va(pfn << PAGE_SHIFT),
+				pfn_pte_ma(frame_list[i], PAGE_KERNEL),
+				0);
+			PASSING(ret == 0,
+			    ("HYPERVISOR_update_va_mapping failed"));
+		}
+#endif
+
+		/* Relinquish the page back to the allocator. */
+		vm_page_unwire(page, 0);
+		vm_page_free(page);
+	}
+
+	bs.current_pages += nr_pages;
+	//totalram_pages = bs.current_pages;
+
+ out:
+	mtx_unlock(&balloon_lock);
+
+	return 0;
+}
+
+static int
+decrease_reservation(unsigned long nr_pages)
+{
+	unsigned long  pfn, i;
+	vm_page_t      page;
+	int            need_sleep = 0;
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	if (nr_pages > ARRAY_SIZE(frame_list))
+		nr_pages = ARRAY_SIZE(frame_list);
+
+	for (i = 0; i < nr_pages; i++) {
+		int color = 0;
+		if ((page = vm_page_alloc(NULL, color++, 
+			    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
+			    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+			nr_pages = i;
+			need_sleep = 1;
+			break;
+		}
+
+		pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
+		frame_list[i] = PFNTOMFN(pfn);
+
+#if 0
+		if (!PageHighMem(page)) {
+			v = phys_to_virt(pfn << PAGE_SHIFT);
+			scrub_pages(v, 1);
+#ifdef CONFIG_XEN
+			ret = HYPERVISOR_update_va_mapping(
+				(unsigned long)v, __pte_ma(0), 0);
+			BUG_ON(ret);
+#endif
+		}
+#endif
+#ifdef CONFIG_XEN_SCRUB_PAGES
+		else {
+			v = kmap(page);
+			scrub_pages(v, 1);
+			kunmap(page);
+		}
+#endif
+	}
+
+#ifdef CONFIG_XEN
+	/* Ensure that ballooned highmem pages don't have kmaps. */
+	kmap_flush_unused();
+	flush_tlb_all();
+#endif
+
+	mtx_lock(&balloon_lock);
+
+	/* No more mappings: invalidate P2M and add to balloon. */
+	for (i = 0; i < nr_pages; i++) {
+		pfn = MFNTOPFN(frame_list[i]);
+		set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
+		balloon_append(PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT));
+	}
+
+	set_xen_guest_handle(reservation.extent_start, frame_list);
+	reservation.nr_extents   = nr_pages;
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	KASSERT(ret == nr_pages, ("HYPERVISOR_memory_op failed"));
+
+	bs.current_pages -= nr_pages;
+	//totalram_pages = bs.current_pages;
+
+	mtx_unlock(&balloon_lock);
+
+	return (need_sleep);
+}
+
+/*
+ * We avoid multiple worker processes conflicting via the balloon mutex.
+ * We may of course race updates of the target counts (which are protected
+ * by the balloon lock), or with changes to the Xen hard limit, but we will
+ * recover from these in time.
+ */
+static void 
+balloon_process(void *unused)
+{
+	int need_sleep = 0;
+	long credit;
+	
+	mtx_lock(&balloon_mutex);
+	for (;;) {
+		do {
+			credit = current_target() - bs.current_pages;
+			if (credit > 0)
+				need_sleep = (increase_reservation(credit) != 0);
+			if (credit < 0)
+				need_sleep = (decrease_reservation(-credit) != 0);
+			
+		} while ((credit != 0) && !need_sleep);
+		
+		/* Schedule more work if there is some still to be done. */
+		if (current_target() != bs.current_pages)
+			timeout(balloon_alarm, NULL, ticks + hz);
+
+		msleep(balloon_process, &balloon_mutex, 0, "balloon", -1);
+	}
+	mtx_unlock(&balloon_mutex);
+}
+
+/* Resets the Xen limit, sets new target, and kicks off processing. */
+static void 
+set_new_target(unsigned long target)
+{
+	/* No need for lock. Not read-modify-write updates. */
+	bs.hard_limit   = ~0UL;
+	bs.target_pages = max(target, minimum_target());
+	wakeup(balloon_process);
+}
+
+static struct xenbus_watch target_watch =
+{
+	.node = "memory/target"
+};
+
+/* React to a change in the target key */
+static void 
+watch_target(struct xenbus_watch *watch,
+	     const char **vec, unsigned int len)
+{
+	unsigned long long new_target;
+	int err;
+
+	err = xenbus_scanf(XBT_NIL, "memory", "target", NULL,
+	    "%llu", &new_target);
+	if (err) {
+		/* This is ok (for domain0 at least) - so just return */
+		return;
+	} 
+        
+	/* The given memory/target value is in KiB, so it needs converting to
+	   pages.  PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
+	*/
+	set_new_target(new_target >> (PAGE_SHIFT - 10));
+    
+}
+
+static void 
+balloon_init_watcher(void *arg)
+{
+	int err;
+
+	err = register_xenbus_watch(&target_watch);
+	if (err)
+		printf("Failed to set balloon watcher\n");
+
+}
+SYSINIT(balloon_init_watcher, SI_SUB_PSEUDO, SI_ORDER_ANY,
+    balloon_init_watcher, NULL);
+
+static void 
+balloon_init(void *arg)
+{
+#ifndef XENHVM
+	vm_page_t page;
+#endif
+
+	if (!is_running_on_xen())
+		return;
+
+	mtx_init(&balloon_lock, "balloon_lock", NULL, MTX_DEF);
+	mtx_init(&balloon_mutex, "balloon_mutex", NULL, MTX_DEF);
+
+#ifndef XENHVM
+	bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
+#else
+	bs.current_pages = physmem;
+#endif
+	bs.target_pages  = bs.current_pages;
+	bs.balloon_low   = 0;
+	bs.balloon_high  = 0;
+	bs.driver_pages  = 0UL;
+	bs.hard_limit    = ~0UL;
+
+	kthread_create(balloon_process, NULL, NULL, 0, 0, "balloon");
+//	init_timer(&balloon_timer);
+//	balloon_timer.data = 0;
+//	balloon_timer.function = balloon_alarm;
+    
+#ifndef XENHVM
+	/* Initialise the balloon with excess memory space. */
+	for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
+		page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
+		balloon_append(page);
+	}
+#endif
+
+	target_watch.callback = watch_target;
+    
+	return;
+}
+SYSINIT(balloon_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, balloon_init, NULL);
+
+void balloon_update_driver_allowance(long delta);
+
+void 
+balloon_update_driver_allowance(long delta)
+{
+	mtx_lock(&balloon_lock);
+	bs.driver_pages += delta;
+	mtx_unlock(&balloon_lock);
+}
+
+#if 0
+static int dealloc_pte_fn(
+	pte_t *pte, struct page *pte_page, unsigned long addr, void *data)
+{
+	unsigned long mfn = pte_mfn(*pte);
+	int ret;
+	struct xen_memory_reservation reservation = {
+		.extent_start = &mfn,
+		.nr_extents   = 1,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	set_pte_at(&init_mm, addr, pte, __pte_ma(0));
+	set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
+	ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+	KASSERT(ret == 1, ("HYPERVISOR_memory_op failed"));
+	return 0;
+}
+
+#endif
+
+#if 0
+vm_page_t
+balloon_alloc_empty_page_range(unsigned long nr_pages)
+{
+	vm_page_t pages;
+	int i, rc;
+	unsigned long *mfn_list;
+	struct xen_memory_reservation reservation = {
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = vm_page_alloc_contig(nr_pages, 0, -1, 4, 4)
+	if (pages == NULL)
+		return NULL;
+	
+	mfn_list = malloc(nr_pages*sizeof(unsigned long), M_DEVBUF, M_WAITOK);
+	
+	for (i = 0; i < nr_pages; i++) {
+		mfn_list[i] = PFNTOMFN(VM_PAGE_TO_PHYS(pages[i]) >> PAGE_SHIFT);
+		PFNTOMFN(i) = INVALID_P2M_ENTRY;
+		reservation.extent_start = mfn_list;
+		reservation.nr_extents = nr_pages;
+		rc = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
+		    &reservation);
+		KASSERT(rc == nr_pages, ("HYPERVISOR_memory_op failed"));
+	}
+
+	current_pages -= nr_pages;
+
+	wakeup(balloon_process);
+
+	return pages;
+}
+
+void 
+balloon_dealloc_empty_page_range(vm_page_t page, unsigned long nr_pages)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_pages; i++)
+		balloon_append(page + i);
+
+	wakeup(balloon_process);
+}
+#endif

Property changes on: dev/xen/balloon/balloon.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/xenpci/machine_reboot.c
===================================================================
--- dev/xen/xenpci/machine_reboot.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/xenpci/machine_reboot.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,79 @@
+/*-
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/interrupt.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+void
+xen_suspend()
+{
+	int suspend_cancelled;
+
+	if (DEVICE_SUSPEND(root_bus)) {
+		printf("xen_suspend: device_suspend failed\n");
+		return;
+	}
+
+	/*
+	 * Make sure we don't change cpus or switch to some other
+	 * thread. for the duration.
+	 */
+	critical_enter();
+
+	/*
+	 * Prevent any races with evtchn_interrupt() handler.
+	 */
+	irq_suspend();
+	disable_intr();
+
+	suspend_cancelled = HYPERVISOR_suspend(0);
+	if (!suspend_cancelled)
+		xenpci_resume();
+
+	/*
+	 * Re-enable interrupts and put the scheduler back to normal.
+	 */
+	enable_intr();
+	critical_exit();
+
+	/*
+	 * FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
+	 * similar.
+	 */
+	if (!suspend_cancelled)
+		DEVICE_RESUME(root_bus);
+}

Property changes on: dev/xen/xenpci/machine_reboot.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: dev/xen/xenpci/xenpcivar.h
===================================================================
--- dev/xen/xenpci/xenpcivar.h	(.../stable/6/sys)	(revision 0)
+++ dev/xen/xenpci/xenpcivar.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * One of these per allocated device.
+ */
+struct xenpci_softc {
+	int rid_ioport;
+	int rid_memory;
+	int rid_irq;
+	struct resource* res_memory;	/* Resource for mem range. */
+	struct resource* res_irq;	/* Resource for irq range. */
+	void	*intr_cookie;
+
+	vm_paddr_t phys_next;		/* next page from mem range */
+};
+
+extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp);
+extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa);
+extern void xenpci_resume(void);
+extern void xen_suspend(void);

Property changes on: dev/xen/xenpci/xenpcivar.h
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: dev/xen/xenpci/xenpci.c
===================================================================
--- dev/xen/xenpci/xenpci.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/xenpci/xenpci.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,399 @@
+/*
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+
+#include <machine/bus.h>
+#include <machine/resource.h>
+#include <sys/rman.h>
+
+#include <machine/stdarg.h>
+#include <machine/xen/xen-os.h>
+#include <xen/features.h>
+#include <xen/hypervisor.h>
+#include <xen/gnttab.h>
+#include <xen/xen_intr.h>
+#include <xen/interface/memory.h>
+#include <xen/interface/hvm/params.h>
+
+#include <dev/pci/pcireg.h>
+#include <dev/pci/pcivar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+#include <vm/pmap.h>
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+/*
+ * These variables are used by the rest of the kernel to access the
+ * hypervisor.
+ */
+char *hypercall_stubs;
+shared_info_t *HYPERVISOR_shared_info;
+static vm_paddr_t shared_info_pa;
+
+/*
+ * This is used to find our platform device instance.
+ */
+static devclass_t xenpci_devclass;
+
+/*
+ * Return the CPUID base address for Xen functions.
+ */
+static uint32_t
+xenpci_cpuid_base(void)
+{
+	uint32_t base, regs[4];
+
+	for (base = 0x40000000; base < 0x40001000; base += 0x100) {
+		do_cpuid(base, regs);
+		if (!memcmp("XenVMMXenVMM", &regs[1], 12)
+		    && (regs[0] - base) >= 2)
+			return (base);
+	}
+	return (0);
+}
+
+/*
+ * Allocate and fill in the hypcall page.
+ */
+static int
+xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
+{
+	uint32_t base, regs[4];
+	int i;
+
+	base = xenpci_cpuid_base();
+	if (!base) {
+		device_printf(dev, "Xen platform device but not Xen VMM\n");
+		return (EINVAL);
+	}
+
+	if (bootverbose) {
+		do_cpuid(base + 1, regs);
+		device_printf(dev, "Xen version %d.%d.\n",
+		    regs[0] >> 16, regs[0] & 0xffff);
+	}
+
+	/*
+	 * Find the hypercall pages.
+	 */
+	do_cpuid(base + 2, regs);
+	
+	hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK);
+
+	for (i = 0; i < regs[0]; i++) {
+		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
+	}
+
+	return (0);
+}
+
+/*
+ * After a resume, re-initialise the hypercall page.
+ */
+static void
+xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp)
+{
+	uint32_t base, regs[4];
+	int i;
+
+	base = xenpci_cpuid_base();
+
+	do_cpuid(base + 2, regs);
+	for (i = 0; i < regs[0]; i++) {
+		wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i);
+	}
+}
+
+/*
+ * Tell the hypervisor how to contact us for event channel callbacks.
+ */
+static void
+xenpci_set_callback(device_t dev)
+{
+	int irq;
+	uint64_t callback;
+	struct xen_hvm_param xhp;
+
+	irq = pci_get_irq(dev);
+	if (irq < 16) {
+		callback = irq;
+	} else {
+		callback = (pci_get_intpin(dev) - 1) & 3;
+		callback |= pci_get_slot(dev) << 11;
+		callback |= 1ull << 56;
+	}
+
+	xhp.domid = DOMID_SELF;
+	xhp.index = HVM_PARAM_CALLBACK_IRQ;
+	xhp.value = callback;
+	if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp))
+		panic("Can't set evtchn callback");
+}
+
+
+/*
+ * Deallocate anything allocated by xenpci_allocate_resources.
+ */
+static int
+xenpci_deallocate_resources(device_t dev)
+{
+	struct xenpci_softc *scp = device_get_softc(dev);
+
+	if (scp->res_irq != 0) {
+		bus_deactivate_resource(dev, SYS_RES_IRQ,
+			scp->rid_irq, scp->res_irq);
+		bus_release_resource(dev, SYS_RES_IRQ,
+			scp->rid_irq, scp->res_irq);
+		scp->res_irq = 0;
+	}
+	if (scp->res_memory != 0) {
+		bus_deactivate_resource(dev, SYS_RES_MEMORY,
+			scp->rid_memory, scp->res_memory);
+		bus_release_resource(dev, SYS_RES_MEMORY,
+			scp->rid_memory, scp->res_memory);
+		scp->res_memory = 0;
+	}
+
+	return (0);
+}
+
+/*
+ * Allocate irq and memory resources.
+ */
+static int
+xenpci_allocate_resources(device_t dev)
+{
+	struct xenpci_softc *scp = device_get_softc(dev);
+
+	scp->res_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ,
+			&scp->rid_irq, RF_SHAREABLE|RF_ACTIVE);
+	if (scp->res_irq == NULL)
+		goto errexit;
+
+	scp->rid_memory = PCIR_BAR(1);
+	scp->res_memory = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
+			&scp->rid_memory, RF_ACTIVE);
+	if (scp->res_memory == NULL)
+		goto errexit;
+	return (0);
+
+errexit:
+	/* Cleanup anything we may have assigned. */
+	xenpci_deallocate_resources(dev);
+	return (ENXIO); /* For want of a better idea. */
+}
+
+/*
+ * Allocate a physical address range from our mmio region.
+ */
+static int
+xenpci_alloc_space_int(struct xenpci_softc *scp, size_t sz,
+    vm_paddr_t *pa)
+{
+
+	if (scp->phys_next + sz > rman_get_end(scp->res_memory)) {
+		return (ENOMEM);
+	}
+
+	*pa = scp->phys_next;
+	scp->phys_next += sz;
+
+	return (0);
+}
+
+/*
+ * Allocate a physical address range from our mmio region.
+ */
+int
+xenpci_alloc_space(size_t sz, vm_paddr_t *pa)
+{
+	device_t dev = devclass_get_device(xenpci_devclass, 0);
+
+	if (dev) {
+		return (xenpci_alloc_space_int(device_get_softc(dev),
+			sz, pa));
+	} else {
+		return (ENOMEM);
+	}
+}
+
+/*
+ * Called very early in the resume sequence - reinitialise the various
+ * bits of Xen machinery including the hypercall page and the shared
+ * info page.
+ */
+void
+xenpci_resume()
+{
+	device_t dev = devclass_get_device(xenpci_devclass, 0);
+	struct xenpci_softc *scp = device_get_softc(dev);
+	struct xen_add_to_physmap xatp;
+
+	xenpci_resume_hypercall_stubs(dev, scp);
+
+	xatp.domid = DOMID_SELF;
+	xatp.idx = 0;
+	xatp.space = XENMAPSPACE_shared_info;
+	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
+	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+		panic("HYPERVISOR_memory_op failed");
+
+	pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa);
+
+	xenpci_set_callback(dev);
+
+	gnttab_resume();
+	irq_resume();
+}
+
+/*
+ * Probe - just check device ID.
+ */
+static int
+xenpci_probe(device_t dev)
+{
+
+	if (pci_get_devid(dev) != 0x00015853)
+		return (ENXIO);
+
+	device_set_desc(dev, "Xen Platform Device");
+	return (bus_generic_probe(dev));
+}
+
+/*
+ * Attach - find resources and talk to Xen.
+ */
+static int
+xenpci_attach(device_t dev)
+{
+        int error;
+	struct xenpci_softc *scp = device_get_softc(dev);
+	struct xen_add_to_physmap xatp;
+	vm_offset_t shared_va;
+
+	error = xenpci_allocate_resources(dev);
+	if (error)
+		goto errexit;
+
+	scp->phys_next = rman_get_start(scp->res_memory);
+
+	error = xenpci_init_hypercall_stubs(dev, scp);
+	if (error)
+		goto errexit;
+
+	setup_xen_features();
+
+	xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); 
+
+	xatp.domid = DOMID_SELF;
+	xatp.idx = 0;
+	xatp.space = XENMAPSPACE_shared_info;
+	xatp.gpfn = shared_info_pa >> PAGE_SHIFT;
+	if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+		panic("HYPERVISOR_memory_op failed");
+
+	shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	pmap_kenter(shared_va, shared_info_pa);
+	HYPERVISOR_shared_info = (void *) shared_va;
+
+	/*
+	 * Hook the irq up to evtchn
+	 */
+	xenpci_irq_init(dev, scp);
+	xenpci_set_callback(dev);
+
+	return (bus_generic_attach(dev));
+
+errexit:
+	/*
+	 * Undo anything we may have done.
+	 */
+	xenpci_deallocate_resources(dev);
+        return (error);
+}
+
+/*
+ * Detach - reverse anything done by attach.
+ */
+static int
+xenpci_detach(device_t dev)
+{
+	struct xenpci_softc *scp = device_get_softc(dev);
+	device_t parent = device_get_parent(dev);
+
+	/*
+	 * Take our interrupt handler out of the list of handlers
+	 * that can handle this irq.
+	 */
+	if (scp->intr_cookie != NULL) {
+		if (BUS_TEARDOWN_INTR(parent, dev,
+			scp->res_irq, scp->intr_cookie) != 0)
+				printf("intr teardown failed.. continuing\n");
+		scp->intr_cookie = NULL;
+	}
+
+	/*
+	 * Deallocate any system resources we may have
+	 * allocated on behalf of this driver.
+	 */
+	return (xenpci_deallocate_resources(dev));
+}
+
+static device_method_t xenpci_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		xenpci_probe),
+	DEVMETHOD(device_attach,	xenpci_attach),
+	DEVMETHOD(device_detach,	xenpci_detach),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+
+	/* Bus interface */
+	DEVMETHOD(bus_add_child,	bus_generic_add_child),
+
+	{ 0, 0 }
+};
+
+static driver_t xenpci_driver = {
+	"xenpci",
+	xenpci_methods,
+	sizeof(struct xenpci_softc),
+};
+
+DRIVER_MODULE(xenpci, pci, xenpci_driver, xenpci_devclass, 0, 0);

Property changes on: dev/xen/xenpci/xenpci.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: dev/xen/xenpci/evtchn.c
===================================================================
--- dev/xen/xenpci/evtchn.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/xenpci/evtchn.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,418 @@
+/******************************************************************************
+ * evtchn.c
+ *
+ * A simplified event channel for para-drivers in unmodified linux
+ *
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005, Intel Corporation <xiaofeng.ling@intel.com>
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/interrupt.h>
+#include <sys/pcpu.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <sys/smp.h>
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+static inline unsigned long __ffs(unsigned long word)
+{
+        __asm__("bsfq %1,%0"
+                :"=r" (word)
+                :"rm" (word));
+        return word;
+}
+
+#define is_valid_evtchn(x)	((x) != 0)
+#define evtchn_from_irq(x)	(irq_evtchn[irq].evtchn)
+
+static struct {
+	struct mtx lock;
+	driver_intr_t *handler;
+	void *arg;
+	int evtchn;
+	int close:1; /* close on unbind_from_irqhandler()? */
+	int inuse:1;
+	int in_handler:1;
+	int mpsafe:1;
+} irq_evtchn[256];
+static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
+	[0 ...  NR_EVENT_CHANNELS-1] = -1 };
+
+static struct mtx irq_alloc_lock;
+static device_t xenpci_device;
+
+#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
+
+static unsigned int
+alloc_xen_irq(void)
+{
+	static int warned;
+	unsigned int irq;
+
+	mtx_lock(&irq_alloc_lock);
+
+	for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) {
+		if (irq_evtchn[irq].inuse) 
+			continue;
+		irq_evtchn[irq].inuse = 1;
+		mtx_unlock(&irq_alloc_lock);
+		return irq;
+	}
+
+	if (!warned) {
+		warned = 1;
+		printf("alloc_xen_irq: No available IRQ to bind to: "
+		       "increase irq_evtchn[] size in evtchn.c.\n");
+	}
+
+	mtx_unlock(&irq_alloc_lock);
+
+	return -ENOSPC;
+}
+
+static void
+free_xen_irq(int irq)
+{
+
+	mtx_lock(&irq_alloc_lock);
+	irq_evtchn[irq].inuse = 0;
+	mtx_unlock(&irq_alloc_lock);
+}
+
+int
+irq_to_evtchn_port(int irq)
+{
+
+	return irq_evtchn[irq].evtchn;
+}
+
+void
+mask_evtchn(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+void
+unmask_evtchn(int port)
+{
+	evtchn_unmask_t op = { .port = port };
+
+	HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op);
+}
+
+int
+bind_listening_port_to_irqhandler(unsigned int remote_domain,
+    const char *devname, driver_intr_t handler, void *arg,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	struct evtchn_alloc_unbound alloc_unbound;
+	unsigned int irq;
+	int error;
+
+	irq = alloc_xen_irq();
+	if (irq < 0)
+		return irq;
+
+	mtx_lock(&irq_evtchn[irq].lock);
+
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = remote_domain;
+	error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (error) {
+		mtx_unlock(&irq_evtchn[irq].lock);
+		free_xen_irq(irq);
+		return (-error);
+	}
+
+	irq_evtchn[irq].handler = handler;
+	irq_evtchn[irq].arg     = arg;
+	irq_evtchn[irq].evtchn  = alloc_unbound.port;
+	irq_evtchn[irq].close   = 1;
+	irq_evtchn[irq].mpsafe  = (irqflags & INTR_MPSAFE) != 0;
+
+	evtchn_to_irq[alloc_unbound.port] = irq;
+
+	unmask_evtchn(alloc_unbound.port);
+
+	mtx_unlock(&irq_evtchn[irq].lock);
+
+	if (irqp)
+		*irqp = irq;
+	return (0);
+}
+
+int
+bind_caller_port_to_irqhandler(unsigned int caller_port,
+    const char *devname, driver_intr_t handler, void *arg,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+
+	irq = alloc_xen_irq();
+	if (irq < 0)
+		return irq;
+
+	mtx_lock(&irq_evtchn[irq].lock);
+
+	irq_evtchn[irq].handler = handler;
+	irq_evtchn[irq].arg     = arg;
+	irq_evtchn[irq].evtchn  = caller_port;
+	irq_evtchn[irq].close   = 0;
+	irq_evtchn[irq].mpsafe  = (irqflags & INTR_MPSAFE) != 0;
+
+	evtchn_to_irq[caller_port] = irq;
+
+	unmask_evtchn(caller_port);
+
+	mtx_unlock(&irq_evtchn[irq].lock);
+
+	if (irqp)
+		*irqp = irq;
+	return (0);
+}
+
+void
+unbind_from_irqhandler(unsigned int irq)
+{
+	int evtchn;
+
+	mtx_lock(&irq_evtchn[irq].lock);
+
+	evtchn = evtchn_from_irq(irq);
+
+	if (is_valid_evtchn(evtchn)) {
+		evtchn_to_irq[evtchn] = -1;
+		mask_evtchn(evtchn);
+		if (irq_evtchn[irq].close) {
+			struct evtchn_close close = { .port = evtchn };
+			if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+				panic("EVTCHNOP_close failed");
+		}
+	}
+
+	irq_evtchn[irq].handler = NULL;
+	irq_evtchn[irq].evtchn  = 0;
+
+	mtx_unlock(&irq_evtchn[irq].lock);
+
+	while (irq_evtchn[irq].in_handler)
+		cpu_relax();
+
+	free_xen_irq(irq);
+}
+
+void notify_remote_via_irq(int irq)
+{
+	int evtchn;
+
+	evtchn = evtchn_from_irq(irq);
+	if (is_valid_evtchn(evtchn))
+		notify_remote_via_evtchn(evtchn);
+}
+
+static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
+						unsigned int idx)
+{
+	return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
+}
+
+static void
+evtchn_interrupt(void *arg)
+{
+	unsigned int l1i, l2i, port;
+	unsigned long masked_l1, masked_l2;
+	/* XXX: All events are bound to vcpu0 but irq may be redirected. */
+	int cpu = 0; /*smp_processor_id();*/
+	driver_intr_t *handler;
+	void *handler_arg;
+	int irq, handler_mpsafe;
+	shared_info_t *s = HYPERVISOR_shared_info;
+	vcpu_info_t *v = &s->vcpu_info[cpu];
+	struct pcpu *pc = pcpu_find(cpu);
+	unsigned long l1, l2;
+
+	v->evtchn_upcall_pending = 0;
+
+#if 0
+#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
+	/* Clear master flag /before/ clearing selector flag. */
+	wmb();
+#endif
+#endif
+
+	l1 = atomic_readandclear_long(&v->evtchn_pending_sel);
+
+	l1i = pc->pc_last_processed_l1i;
+	l2i = pc->pc_last_processed_l2i;
+
+	while (l1 != 0) {
+
+		l1i = (l1i + 1) % LONG_BIT;
+		masked_l1 = l1 & ((~0UL) << l1i);
+
+		if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */
+			l1i = LONG_BIT - 1;
+			l2i = LONG_BIT - 1;
+			continue;
+		}
+		l1i = __ffs(masked_l1);
+
+		do {
+			l2 = active_evtchns(cpu, s, l1i);
+
+			l2i = (l2i + 1) % LONG_BIT;
+			masked_l2 = l2 & ((~0UL) << l2i);
+
+			if (masked_l2 == 0) { /* if we masked out all events, move on */
+				l2i = LONG_BIT - 1;
+				break;
+			}
+			l2i = __ffs(masked_l2);
+
+			/* process port */
+			port = (l1i * LONG_BIT) + l2i;
+			synch_clear_bit(port, &s->evtchn_pending[0]);
+
+			irq = evtchn_to_irq[port];
+			if (irq < 0)
+				continue;
+
+			mtx_lock(&irq_evtchn[irq].lock);
+			handler = irq_evtchn[irq].handler;
+			handler_arg = irq_evtchn[irq].arg;
+			handler_mpsafe = irq_evtchn[irq].mpsafe;
+			if (unlikely(handler == NULL)) {
+				printf("Xen IRQ%d (port %d) has no handler!\n",
+				       irq, port);
+				mtx_unlock(&irq_evtchn[irq].lock);
+				continue;
+			}
+			irq_evtchn[irq].in_handler = 1;
+			mtx_unlock(&irq_evtchn[irq].lock);
+
+			//local_irq_enable();
+			if (!handler_mpsafe)
+				mtx_lock(&Giant);
+			handler(handler_arg);
+			if (!handler_mpsafe)
+				mtx_unlock(&Giant);
+			//local_irq_disable();
+
+			mtx_lock(&irq_evtchn[irq].lock);
+			irq_evtchn[irq].in_handler = 0;
+			mtx_unlock(&irq_evtchn[irq].lock);
+
+			/* if this is the final port processed, we'll pick up here+1 next time */
+			pc->pc_last_processed_l1i = l1i;
+			pc->pc_last_processed_l2i = l2i;
+
+		} while (l2i != LONG_BIT - 1);
+
+		l2 = active_evtchns(cpu, s, l1i);
+		if (l2 == 0) /* we handled all ports, so we can clear the selector bit */
+			l1 &= ~(1UL << l1i);
+	}
+}
+
+void
+irq_suspend(void)
+{
+	struct xenpci_softc *scp = device_get_softc(xenpci_device);
+
+	/*
+	 * Take our interrupt handler out of the list of handlers
+	 * that can handle this irq.
+	 */
+	if (scp->intr_cookie != NULL) {
+		if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device),
+			xenpci_device, scp->res_irq, scp->intr_cookie) != 0)
+			printf("intr teardown failed.. continuing\n");
+		scp->intr_cookie = NULL;
+	}
+}
+
+void
+irq_resume(void)
+{
+	struct xenpci_softc *scp = device_get_softc(xenpci_device);
+	int evtchn, irq;
+
+	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) {
+		mask_evtchn(evtchn);
+		evtchn_to_irq[evtchn] = -1;
+	}
+
+	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
+		irq_evtchn[irq].evtchn = 0;
+
+	BUS_SETUP_INTR(device_get_parent(xenpci_device),
+	    xenpci_device, scp->res_irq, INTR_TYPE_MISC,
+	    evtchn_interrupt, NULL, &scp->intr_cookie);
+}
+
+int
+xenpci_irq_init(device_t device, struct xenpci_softc *scp)
+{
+	int irq, cpu;
+	int error;
+
+	mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF);
+
+	for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++)
+		mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF);
+
+	for (cpu = 0; cpu < mp_ncpus; cpu++) {
+		pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1;
+		pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1;
+	}
+
+	error = BUS_SETUP_INTR(device_get_parent(device), device,
+	    scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, evtchn_interrupt, NULL,
+	    &scp->intr_cookie);
+	if (error)
+		return (error);
+
+	xenpci_device = device;
+
+	return (0);
+}

Property changes on: dev/xen/xenpci/evtchn.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: dev/xen/evtchn/evtchn_dev.c
===================================================================
--- dev/xen/evtchn/evtchn_dev.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/evtchn/evtchn_dev.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,394 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+#include <machine/xen-os.h>
+#include <xen/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/synch_bitops.h>
+
+#include <xen/hypervisor.h>
+
+
+typedef struct evtchn_sotfc {
+
+	struct selinfo  ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+
+#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t      evtchn_read;
+static d_write_t     evtchn_write;
+static d_ioctl_t     evtchn_ioctl;
+static d_poll_t      evtchn_poll;
+static d_open_t      evtchn_open;
+static d_close_t     evtchn_close;
+
+
+void 
+evtchn_device_upcall(int port)
+{
+	mtx_lock(&upcall_lock);
+
+	mask_evtchn(port);
+	clear_evtchn(port);
+
+	if ( ring != NULL ) {
+		if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+			ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
+			if ( ring_cons == ring_prod++ ) {
+				wakeup(evtchn_waddr);
+			}
+		}
+		else {
+			ring_overflow = 1;
+		}
+	}
+
+	mtx_unlock(&upcall_lock);
+}
+
+static void 
+__evtchn_reset_buffer_ring(void)
+{
+	/* Initialise the ring to empty. Clear errors. */
+	ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int rc;
+	unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+	count = uio->uio_resid;
+    
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 )
+	{
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	for ( ; ; ) {
+		if ( (c = ring_cons) != (p = ring_prod) )
+			break;
+
+		if ( ring_overflow ) {
+			rc = EFBIG;
+			goto out;
+		}
+
+		if (sst != 0) {
+			rc = EINTR;
+			goto out;
+		}
+
+		/* PCATCH == check for signals before and after sleeping 
+		 * PWAIT == priority of waiting on resource 
+		 */
+		sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
+	}
+	else {
+		bytes1 = (p - c) * sizeof(uint16_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if ( bytes1 > count ) {
+		bytes1 = count;
+		bytes2 = 0;
+	}
+	else if ( (bytes1 + bytes2) > count ) {
+		bytes2 = count - bytes1;
+	}
+    
+	if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
+	     ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+		/* keeping this around as its replacement is not equivalent 
+		 * copyout(&ring[0], &buf[bytes1], bytes2) 
+		 */
+	{
+		rc = EFAULT;
+		goto out;
+	}
+
+	ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+	rc = bytes1 + bytes2;
+
+ out:
+    
+	return rc;
+}
+
+static int 
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int  rc, i, count;
+    
+	count = uio->uio_resid;
+    
+	uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+	if ( kbuf == NULL )
+		return ENOMEM;
+
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 ) {
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	if ( uiomove(kbuf, count, uio) != 0 ) {
+		rc = EFAULT;
+		goto out;
+	}
+
+	mtx_lock_spin(&lock);
+	for ( i = 0; i < (count/2); i++ )
+		if ( test_bit(kbuf[i], &bound_ports[0]) )
+			unmask_evtchn(kbuf[i]);
+	mtx_unlock_spin(&lock);
+
+	rc = count;
+
+ out:
+	free(kbuf, M_DEVBUF);
+	return rc;
+}
+
+static int 
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, 
+	     int mode, struct thread *td __unused)
+{
+	int rc = 0;
+    
+	mtx_lock_spin(&lock);
+    
+	switch ( cmd )
+	{
+	case EVTCHN_RESET:
+		__evtchn_reset_buffer_ring();
+		break;
+	case EVTCHN_BIND:
+		if ( !synch_test_and_set_bit((int)arg, &bound_ports[0]) )
+			unmask_evtchn((int)arg);
+		else
+			rc = EINVAL;
+		break;
+	case EVTCHN_UNBIND:
+		if ( synch_test_and_clear_bit((int)arg, &bound_ports[0]) )
+			mask_evtchn((int)arg);
+		else
+			rc = EINVAL;
+		break;
+	default:
+		rc = ENOSYS;
+		break;
+	}
+
+	mtx_unlock_spin(&lock);   
+
+	return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+	evtchn_softc_t *sc;
+	unsigned int mask = POLLOUT | POLLWRNORM;
+    
+	sc = dev->si_drv1;
+    
+	if ( ring_cons != ring_prod )
+		mask |= POLLIN | POLLRDNORM;
+	else if ( ring_overflow )
+		mask = POLLERR;
+	else
+		selrecord(td, &sc->ev_rsel);
+
+
+	return mask;
+}
+
+
+static int 
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+	uint16_t *_ring;
+    
+	if (flag & O_NONBLOCK)
+		return EBUSY;
+
+	if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+		return EBUSY;
+
+	if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+		return ENOMEM;
+
+	mtx_lock_spin(&lock);
+	ring = _ring;
+	__evtchn_reset_buffer_ring();
+	mtx_unlock_spin(&lock);
+
+
+	return 0;
+}
+
+static int 
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+	int i;
+
+	mtx_lock_spin(&lock);
+	if (ring != NULL) {
+		free(ring, M_DEVBUF);
+		ring = NULL;
+	}
+	for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+		if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+			mask_evtchn(i);
+	mtx_unlock_spin(&lock);
+
+	evtchn_dev_inuse = 0;
+
+	return 0;
+}
+
+static struct cdevsw evtchn_devsw = {
+	d_version:   D_VERSION,
+	d_open:      evtchn_open,
+	d_close:     evtchn_close,
+	d_read:      evtchn_read,
+	d_write:     evtchn_write,
+	d_ioctl:     evtchn_ioctl,
+	d_poll:      evtchn_poll,
+	d_name:      "evtchn",
+	d_flags:     0,
+};
+
+
+/* XXX  - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int 
+evtchn_init(void *dummy __unused)
+{
+	/* XXX I believe we don't need these leaving them here for now until we 
+	 * have some semblance of it working 
+	 */
+	mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+	/* (DEVFS) create '/dev/misc/evtchn'. */
+	evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+	mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+	evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+	bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+	/* XXX I don't think we need any of this rubbish */
+#if 0
+	if ( err != 0 )
+	{
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	/* (DEVFS) create directory '/dev/xen'. */
+	xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+	/* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+	pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
+				  &link_dest[3], 
+				  sizeof(link_dest) - 3);
+	if ( pos >= 0 )
+		strncpy(&link_dest[pos], "../", 3);
+	/* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+	(void)devfs_mk_symlink(xen_dev_dir, 
+			       "evtchn", 
+			       DEVFS_FL_DEFAULT, 
+			       &link_dest[pos],
+			       &symlink_handle, 
+			       NULL);
+
+	/* (DEVFS) automatically destroy the symlink with its destination. */
+	devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+	printk("Event-channel device installed.\n");
+
+	return 0;
+}
+
+
+SYSINIT(evtchn_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_init, NULL);
+
+

Property changes on: dev/xen/evtchn/evtchn_dev.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/netback/netback.c
===================================================================
--- dev/xen/netback/netback.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/netback/netback.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1585 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <net/if_types.h>
+#include <net/ethernet.h>
+#include <net/if_bridgevar.h>
+
+#include <netinet/in_systm.h>
+#include <netinet/in.h>
+#include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/in_cksum.h>
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#ifdef XEN_NETBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+#define DDPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#define DPRINTF_MBUF(_m) print_mbuf(_m, 0)
+#define DPRINTF_MBUF_LEN(_m, _len) print_mbuf(_m, _len)
+#else
+#define DDPRINTF(fmt, args...) ((void)0)
+#define DPRINTF_MBUF(_m) ((void)0)
+#define DPRINTF_MBUF_LEN(_m, _len) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+    printf("netback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define BUG_ON PANIC_IF
+
+#define IFNAME(_np) (_np)->ifp->if_xname
+
+#define NET_TX_RING_SIZE __RING_SIZE((netif_tx_sring_t *)0, PAGE_SIZE)
+#define NET_RX_RING_SIZE __RING_SIZE((netif_rx_sring_t *)0, PAGE_SIZE)
+
+struct ring_ref {
+	vm_offset_t va;
+	grant_handle_t handle;
+	uint64_t bus_addr;
+};
+
+typedef struct netback_info {
+
+	/* Schedule lists */
+	STAILQ_ENTRY(netback_info) next_tx;
+	STAILQ_ENTRY(netback_info) next_rx;
+	int on_tx_sched_list;
+	int on_rx_sched_list;
+
+	struct xenbus_device *xdev;
+	XenbusState frontend_state;
+
+	domid_t domid;
+	int handle;
+	char *bridge;
+
+	int rings_connected;
+	struct ring_ref tx_ring_ref;
+	struct ring_ref rx_ring_ref;
+	netif_tx_back_ring_t tx;
+	netif_rx_back_ring_t rx;
+	evtchn_port_t evtchn;
+	int irq;
+	void *irq_cookie;
+
+	struct ifnet *ifp;
+	int ref_cnt;
+
+	device_t ndev;
+	int attached;
+} netif_t;
+
+
+#define MAX_PENDING_REQS 256
+#define PKT_PROT_LEN 64
+
+static struct {
+	netif_tx_request_t req;
+	netif_t *netif;
+} pending_tx_info[MAX_PENDING_REQS];
+static uint16_t pending_ring[MAX_PENDING_REQS];
+typedef unsigned int PEND_RING_IDX;
+#define MASK_PEND_IDX(_i) ((_i)&(MAX_PENDING_REQS-1))
+static PEND_RING_IDX pending_prod, pending_cons;
+#define NR_PENDING_REQS (MAX_PENDING_REQS - pending_prod + pending_cons)
+
+static unsigned long mmap_vstart;
+#define MMAP_VADDR(_req) (mmap_vstart + ((_req) * PAGE_SIZE))
+
+/* Freed TX mbufs get batched on this ring before return to pending_ring. */
+static uint16_t dealloc_ring[MAX_PENDING_REQS];
+static PEND_RING_IDX dealloc_prod, dealloc_cons;
+
+static multicall_entry_t rx_mcl[NET_RX_RING_SIZE+1];
+static mmu_update_t rx_mmu[NET_RX_RING_SIZE];
+static gnttab_transfer_t grant_rx_op[NET_RX_RING_SIZE];
+
+static grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+static gnttab_unmap_grant_ref_t tx_unmap_ops[MAX_PENDING_REQS];
+static gnttab_map_grant_ref_t tx_map_ops[MAX_PENDING_REQS];
+
+static struct task net_tx_task, net_rx_task;
+static struct callout rx_task_callout;
+
+static STAILQ_HEAD(netback_tx_sched_list, netback_info) tx_sched_list =
+	STAILQ_HEAD_INITIALIZER(tx_sched_list);
+static STAILQ_HEAD(netback_rx_sched_list, netback_info) rx_sched_list =
+	STAILQ_HEAD_INITIALIZER(rx_sched_list);
+static struct mtx tx_sched_list_lock;
+static struct mtx rx_sched_list_lock;
+
+static int vif_unit_maker = 0;
+
+/* Protos */
+static void netback_start(struct ifnet *ifp);
+static int netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
+static int vif_add_dev(struct xenbus_device *xdev);
+static void disconnect_rings(netif_t *netif);
+
+#ifdef XEN_NETBACK_DEBUG_LOTS
+/* Debug code to display the contents of an mbuf */
+static void
+print_mbuf(struct mbuf *m, int max)
+{
+	int i, j=0;
+	printf("mbuf %08x len = %d", (unsigned int)m, m->m_pkthdr.len);
+	for (; m; m = m->m_next) {
+		unsigned char *d = m->m_data;
+		for (i=0; i < m->m_len; i++) {
+			if (max && j == max)
+				break;
+			if ((j++ % 16) == 0)
+				printf("\n%04x:", j);
+			printf(" %02x", d[i]);
+		}
+	}
+	printf("\n");
+}
+#endif
+
+
+#define MAX_MFN_ALLOC 64
+static unsigned long mfn_list[MAX_MFN_ALLOC];
+static unsigned int alloc_index = 0;
+
+static unsigned long
+alloc_mfn(void)
+{
+	unsigned long mfn = 0;
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = MAX_MFN_ALLOC,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	if ( unlikely(alloc_index == 0) )
+		alloc_index = HYPERVISOR_memory_op(
+			XENMEM_increase_reservation, &reservation);
+	if ( alloc_index != 0 )
+		mfn = mfn_list[--alloc_index];
+	return mfn;
+}
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+	void *pages;
+	int i = 0, j = 0;
+	multicall_entry_t mcl[17];
+	unsigned long mfn_list[16];
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = 0,
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pages == NULL)
+		return 0;
+
+	memset(mcl, 0, sizeof(mcl));
+
+	while (i < nr_pages) {
+		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+		mcl[j].op = __HYPERVISOR_update_va_mapping;
+		mcl[j].args[0] = va;
+
+		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+		if (j == 16 || i == nr_pages) {
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+			reservation.nr_extents = j;
+
+			mcl[j].op = __HYPERVISOR_memory_op;
+			mcl[j].args[0] = XENMEM_decrease_reservation;
+			mcl[j].args[1] =  (unsigned long)&reservation;
+			
+			(void)HYPERVISOR_multicall(mcl, j+1);
+
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+			j = 0;
+		}
+	}
+
+	return (unsigned long)pages;
+}
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+static void
+fixup_checksum(struct mbuf *m)
+{
+	struct ether_header *eh = mtod(m, struct ether_header *);
+	struct ip *ip = (struct ip *)(eh + 1);
+	int iphlen = ip->ip_hl << 2;
+	int iplen = ntohs(ip->ip_len);
+
+	if ((m->m_pkthdr.csum_flags & CSUM_TCP)) {
+		struct tcphdr *th = (struct tcphdr *)((caddr_t)ip + iphlen);
+		th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+			htons(IPPROTO_TCP + (iplen - iphlen)));
+		th->th_sum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen);
+		m->m_pkthdr.csum_flags &= ~CSUM_TCP;
+	} else {
+		u_short csum;
+		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
+		uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
+			htons(IPPROTO_UDP + (iplen - iphlen)));
+		if ((csum = in_cksum_skip(m, iplen + sizeof(*eh), sizeof(*eh) + iphlen)) == 0)
+			csum = 0xffff;
+		uh->uh_sum = csum;
+		m->m_pkthdr.csum_flags &= ~CSUM_UDP;
+	}
+}
+#endif
+
+/* Add the interface to the specified bridge */
+static int
+add_to_bridge(struct ifnet *ifp, char *bridge)
+{
+	struct ifdrv ifd;
+	struct ifbreq ifb;
+	struct ifnet *ifp_bridge = ifunit(bridge);
+
+	if (!ifp_bridge)
+		return ENOENT;
+
+	bzero(&ifd, sizeof(ifd));
+	bzero(&ifb, sizeof(ifb));
+
+	strcpy(ifb.ifbr_ifsname, ifp->if_xname);
+	strcpy(ifd.ifd_name, ifp->if_xname);
+	ifd.ifd_cmd = BRDGADD;
+	ifd.ifd_len = sizeof(ifb);
+	ifd.ifd_data = &ifb;
+
+	return bridge_ioctl_kern(ifp_bridge, SIOCSDRVSPEC, &ifd);
+	
+}
+
+static int
+netif_create(int handle, struct xenbus_device *xdev, char *bridge)
+{
+	netif_t *netif;
+	struct ifnet *ifp;
+
+	netif = (netif_t *)malloc(sizeof(*netif), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!netif)
+		return ENOMEM;
+
+	netif->ref_cnt = 1;
+	netif->handle = handle;
+	netif->domid = xdev->otherend_id;
+	netif->xdev = xdev;
+	netif->bridge = bridge;
+	xdev->data = netif;
+
+	/* Set up ifnet structure */
+	ifp = netif->ifp = if_alloc(IFT_ETHER);
+	if (!ifp) {
+		if (bridge)
+			free(bridge, M_DEVBUF);
+		free(netif, M_DEVBUF);
+		return ENOMEM;
+	}
+
+	ifp->if_softc = netif;
+	if_initname(ifp, "vif",
+		atomic_fetchadd_int(&vif_unit_maker, 1) /* ifno */ );
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
+	ifp->if_output = ether_output;
+	ifp->if_start = netback_start;
+	ifp->if_ioctl = netback_ioctl;
+	ifp->if_mtu = ETHERMTU;
+	ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1;
+	
+	DPRINTF("Created %s for domid=%d handle=%d\n", IFNAME(netif), netif->domid, netif->handle);
+
+	return 0;
+}
+
+static void
+netif_get(netif_t *netif)
+{
+	atomic_add_int(&netif->ref_cnt, 1);
+}
+
+static void
+netif_put(netif_t *netif)
+{
+	if (atomic_fetchadd_int(&netif->ref_cnt, -1) == 1) {
+		DPRINTF("%s\n", IFNAME(netif));
+		disconnect_rings(netif);
+		if (netif->ifp) {
+			if_free(netif->ifp);
+			netif->ifp = NULL;
+		}
+		if (netif->bridge)
+			free(netif->bridge, M_DEVBUF);
+		free(netif, M_DEVBUF);
+	}
+}
+
+static int
+netback_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+	DDPRINTF("%s cmd=SIOCSIFFLAGS flags=%x\n",
+			IFNAME((struct netback_info *)ifp->if_softc), ((struct ifreq *)data)->ifr_flags);
+		return 0;
+	}
+
+	DDPRINTF("%s cmd=%lx\n", IFNAME((struct netback_info *)ifp->if_softc), cmd);
+
+	return ether_ioctl(ifp, cmd, data);
+}
+
+static inline void
+maybe_schedule_tx_action(void)
+{
+	smp_mb();
+	if ((NR_PENDING_REQS < (MAX_PENDING_REQS/2)) && !STAILQ_EMPTY(&tx_sched_list))
+		taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_tx_schedule_list(void)
+{
+	netif_t *netif;
+
+	mtx_lock(&tx_sched_list_lock);
+
+	if ((netif = STAILQ_FIRST(&tx_sched_list))) {
+		STAILQ_REMOVE(&tx_sched_list, netif, netback_info, next_tx);
+		STAILQ_NEXT(netif, next_tx) = NULL;
+		netif->on_tx_sched_list = 0;
+	}
+
+	mtx_unlock(&tx_sched_list_lock);
+
+	return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_tx_schedule_list_tail(netif_t *netif)
+{
+	if (netif->on_tx_sched_list)
+		return;
+
+	mtx_lock(&tx_sched_list_lock);
+	if (!netif->on_tx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		netif_get(netif);
+		STAILQ_INSERT_TAIL(&tx_sched_list, netif, next_tx);
+		netif->on_tx_sched_list = 1;
+	}
+	mtx_unlock(&tx_sched_list_lock);
+}
+
+/*
+ * Note on CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER:
+ * If this driver is pipelining transmit requests then we can be very
+ * aggressive in avoiding new-packet notifications -- frontend only needs to
+ * send a notification if there are no outstanding unreceived responses.
+ * If we may be buffer transmit buffers for any reason then we must be rather
+ * more conservative and treat this as the final check for pending work.
+ */
+static void
+netif_schedule_tx_work(netif_t *netif)
+{
+	int more_to_do;
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	more_to_do = RING_HAS_UNCONSUMED_REQUESTS(&netif->tx);
+#else
+	RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+#endif
+
+	if (more_to_do) {
+		DDPRINTF("Adding %s to tx sched list\n", IFNAME(netif));
+		add_to_tx_schedule_list_tail(netif);
+		maybe_schedule_tx_action();
+	}
+}
+
+static struct mtx dealloc_lock;
+MTX_SYSINIT(netback_dealloc, &dealloc_lock, "DEALLOC LOCK", MTX_SPIN | MTX_NOWITNESS);
+
+static void
+netif_idx_release(uint16_t pending_idx)
+{
+	mtx_lock_spin(&dealloc_lock);
+	dealloc_ring[MASK_PEND_IDX(dealloc_prod++)] = pending_idx;
+	mtx_unlock_spin(&dealloc_lock);
+
+	taskqueue_enqueue(taskqueue_swi, &net_tx_task); 
+}
+
+static void
+make_tx_response(netif_t *netif, 
+				 uint16_t    id,
+				 int8_t      st)
+{
+	RING_IDX i = netif->tx.rsp_prod_pvt;
+	netif_tx_response_t *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->tx, i);
+	resp->id     = id;
+	resp->status = st;
+
+	netif->tx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->tx, notify);
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+
+#ifdef CONFIG_XEN_NETDEV_PIPELINED_TRANSMITTER
+	if (i == netif->tx.req_cons) {
+		int more_to_do;
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, more_to_do);
+		if (more_to_do)
+			add_to_tx_schedule_list_tail(netif);
+	}
+#endif
+}
+
+inline static void
+net_tx_action_dealloc(void)
+{
+	gnttab_unmap_grant_ref_t *gop;
+	uint16_t pending_idx;
+	PEND_RING_IDX dc, dp;
+	netif_t *netif;
+	int ret;
+
+	dc = dealloc_cons;
+	dp = dealloc_prod;
+
+	/*
+	 * Free up any grants we have finished using
+	 */
+	gop = tx_unmap_ops;
+	while (dc != dp) {
+		pending_idx = dealloc_ring[MASK_PEND_IDX(dc++)];
+		gop->host_addr    = MMAP_VADDR(pending_idx);
+		gop->dev_bus_addr = 0;
+		gop->handle       = grant_tx_handle[pending_idx];
+		gop++;
+	}
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, tx_unmap_ops, gop - tx_unmap_ops);
+	BUG_ON(ret);
+
+	while (dealloc_cons != dp) {
+		pending_idx = dealloc_ring[MASK_PEND_IDX(dealloc_cons++)];
+
+		netif = pending_tx_info[pending_idx].netif;
+
+		make_tx_response(netif, pending_tx_info[pending_idx].req.id, 
+				 NETIF_RSP_OKAY);
+        
+		pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+
+		netif_put(netif);
+	}
+}
+
+static void
+netif_page_release(void *buf, void *args)
+{
+	uint16_t pending_idx = (unsigned int)args;
+	
+	DDPRINTF("pending_idx=%u\n", pending_idx);
+
+	KASSERT(pending_idx < MAX_PENDING_REQS, ("%s: bad index %u", __func__, pending_idx));
+
+	netif_idx_release(pending_idx);
+}
+
+static void
+net_tx_action(void *context, int pending)
+{
+	struct mbuf *m;
+	netif_t *netif;
+	netif_tx_request_t txreq;
+	uint16_t pending_idx;
+	RING_IDX i;
+	gnttab_map_grant_ref_t *mop;
+	int ret, work_to_do;
+	struct mbuf *txq = NULL, *txq_last = NULL;
+
+	if (dealloc_cons != dealloc_prod)
+		net_tx_action_dealloc();
+
+	mop = tx_map_ops;
+	while ((NR_PENDING_REQS < MAX_PENDING_REQS) && !STAILQ_EMPTY(&tx_sched_list)) {
+
+		/* Get a netif from the list with work to do. */
+		netif = remove_from_tx_schedule_list();
+
+		DDPRINTF("Processing %s (prod=%u, cons=%u)\n",
+				IFNAME(netif), netif->tx.sring->req_prod, netif->tx.req_cons);
+
+		RING_FINAL_CHECK_FOR_REQUESTS(&netif->tx, work_to_do);
+		if (!work_to_do) {
+			netif_put(netif);
+			continue;
+		}
+
+		i = netif->tx.req_cons;
+		rmb(); /* Ensure that we see the request before we copy it. */
+		memcpy(&txreq, RING_GET_REQUEST(&netif->tx, i), sizeof(txreq));
+
+		/* If we want credit-based scheduling, coud add it here - WORK */
+
+		netif->tx.req_cons++;
+
+		netif_schedule_tx_work(netif);
+
+		if (unlikely(txreq.size < ETHER_HDR_LEN) || 
+		    unlikely(txreq.size > (ETHER_MAX_LEN-ETHER_CRC_LEN))) {
+			WPRINTF("Bad packet size: %d\n", txreq.size);
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			continue; 
+		}
+
+		/* No crossing a page as the payload mustn't fragment. */
+		if (unlikely((txreq.offset + txreq.size) >= PAGE_SIZE)) {
+			WPRINTF("txreq.offset: %x, size: %u, end: %u\n", 
+				txreq.offset, txreq.size, 
+				(txreq.offset & PAGE_MASK) + txreq.size);
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			continue;
+		}
+
+		pending_idx = pending_ring[MASK_PEND_IDX(pending_cons)];
+
+		MGETHDR(m, M_DONTWAIT, MT_DATA);
+		if (!m) {
+			WPRINTF("Failed to allocate mbuf\n");
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			break;
+		}
+		m->m_pkthdr.rcvif = netif->ifp;
+
+		if ((m->m_pkthdr.len = txreq.size) > PKT_PROT_LEN) {
+			struct mbuf *n;
+			MGET(n, M_DONTWAIT, MT_DATA);
+			if (!(m->m_next = n)) {
+				m_freem(m);
+				WPRINTF("Failed to allocate second mbuf\n");
+				make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+				netif_put(netif);
+				break;
+			}
+			n->m_len = txreq.size - PKT_PROT_LEN;
+			m->m_len = PKT_PROT_LEN;
+		} else
+			m->m_len = txreq.size;
+
+		mop->host_addr = MMAP_VADDR(pending_idx);
+		mop->dom       = netif->domid;
+		mop->ref       = txreq.gref;
+		mop->flags     = GNTMAP_host_map | GNTMAP_readonly;
+		mop++;
+
+		memcpy(&pending_tx_info[pending_idx].req,
+		       &txreq, sizeof(txreq));
+		pending_tx_info[pending_idx].netif = netif;
+		*((uint16_t *)m->m_data) = pending_idx;
+
+		if (txq_last)
+			txq_last->m_nextpkt = m;
+		else
+			txq = m;
+		txq_last = m;
+
+		pending_cons++;
+
+		if ((mop - tx_map_ops) >= ARRAY_SIZE(tx_map_ops))
+			break;
+	}
+
+	if (!txq)
+		return;
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_map_grant_ref, tx_map_ops, mop - tx_map_ops);
+	BUG_ON(ret);
+
+	mop = tx_map_ops;
+	while ((m = txq) != NULL) {
+		caddr_t data;
+
+		txq = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		pending_idx = *((uint16_t *)m->m_data);
+		netif       = pending_tx_info[pending_idx].netif;
+		memcpy(&txreq, &pending_tx_info[pending_idx].req, sizeof(txreq));
+
+		/* Check the remap error code. */
+		if (unlikely(mop->status)) {
+			WPRINTF("#### netback grant fails\n");
+			make_tx_response(netif, txreq.id, NETIF_RSP_ERROR);
+			netif_put(netif);
+			m_freem(m);
+			mop++;
+			pending_ring[MASK_PEND_IDX(pending_prod++)] = pending_idx;
+			continue;
+		}
+
+#if 0
+		/* Can't do this in FreeBSD since vtophys() returns the pfn */
+		/* of the remote domain who loaned us the machine page - DPT */
+		xen_phys_machine[(vtophys(MMAP_VADDR(pending_idx)) >> PAGE_SHIFT)] =
+			mop->dev_bus_addr >> PAGE_SHIFT;
+#endif
+		grant_tx_handle[pending_idx] = mop->handle;
+
+		/* Setup data in mbuf (lengths are already set) */
+		data = (caddr_t)(MMAP_VADDR(pending_idx)|txreq.offset);
+		bcopy(data, m->m_data, m->m_len);
+		if (m->m_next) {
+			struct mbuf *n = m->m_next;
+			MEXTADD(n, MMAP_VADDR(pending_idx), PAGE_SIZE, netif_page_release,
+				(void *)(unsigned int)pending_idx, M_RDONLY, EXT_NET_DRV);
+			n->m_data = &data[PKT_PROT_LEN];
+		} else {
+			/* Schedule a response immediately. */
+			netif_idx_release(pending_idx);
+		}
+
+		if ((txreq.flags & NETTXF_data_validated)) {
+			/* Tell the stack the checksums are okay */
+			m->m_pkthdr.csum_flags |=
+				(CSUM_IP_CHECKED | CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+			m->m_pkthdr.csum_data = 0xffff;
+		}
+
+		/* If necessary, inform stack to compute the checksums if it forwards the packet */
+		if ((txreq.flags & NETTXF_csum_blank)) {
+			struct ether_header *eh = mtod(m, struct ether_header *);
+			if (ntohs(eh->ether_type) == ETHERTYPE_IP) {
+				struct ip *ip = (struct ip *)&m->m_data[14];
+				if (ip->ip_p == IPPROTO_TCP)
+					m->m_pkthdr.csum_flags |= CSUM_TCP;
+				else if (ip->ip_p == IPPROTO_UDP)
+					m->m_pkthdr.csum_flags |= CSUM_UDP;
+			}
+		}
+
+		netif->ifp->if_ibytes += m->m_pkthdr.len;
+		netif->ifp->if_ipackets++;
+
+		DDPRINTF("RECV %d bytes from %s (cflags=%x)\n",
+			m->m_pkthdr.len, IFNAME(netif), m->m_pkthdr.csum_flags);
+		DPRINTF_MBUF_LEN(m, 128);
+
+		(*netif->ifp->if_input)(netif->ifp, m);
+
+		mop++;
+	}
+}
+
+/* Handle interrupt from a frontend */
+static void
+netback_intr(void *arg)
+{
+	netif_t *netif = arg;
+	DDPRINTF("%s\n", IFNAME(netif));
+	add_to_tx_schedule_list_tail(netif);
+	maybe_schedule_tx_action();
+}
+
+/* Removes netif from front of list and does not call netif_put() (caller must) */
+static netif_t *
+remove_from_rx_schedule_list(void)
+{
+	netif_t *netif;
+
+	mtx_lock(&rx_sched_list_lock);
+
+	if ((netif = STAILQ_FIRST(&rx_sched_list))) {
+		STAILQ_REMOVE(&rx_sched_list, netif, netback_info, next_rx);
+		STAILQ_NEXT(netif, next_rx) = NULL;
+		netif->on_rx_sched_list = 0;
+	}
+
+	mtx_unlock(&rx_sched_list_lock);
+
+	return netif;
+}
+
+/* Adds netif to end of list and calls netif_get() */
+static void
+add_to_rx_schedule_list_tail(netif_t *netif)
+{
+	if (netif->on_rx_sched_list)
+		return;
+
+	mtx_lock(&rx_sched_list_lock);
+	if (!netif->on_rx_sched_list && (netif->ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+		netif_get(netif);
+		STAILQ_INSERT_TAIL(&rx_sched_list, netif, next_rx);
+		netif->on_rx_sched_list = 1;
+	}
+	mtx_unlock(&rx_sched_list_lock);
+}
+
+static int
+make_rx_response(netif_t *netif, uint16_t id, int8_t st,
+				 uint16_t offset, uint16_t size, uint16_t flags)
+{
+	RING_IDX i = netif->rx.rsp_prod_pvt;
+	netif_rx_response_t *resp;
+	int notify;
+
+	resp = RING_GET_RESPONSE(&netif->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (int16_t)size;
+	if (st < 0)
+		resp->status = (int16_t)st;
+
+	DDPRINTF("rx resp(%d): off=%x fl=%x id=%x stat=%d\n",
+		i, resp->offset, resp->flags, resp->id, resp->status);
+
+	netif->rx.rsp_prod_pvt = ++i;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&netif->rx, notify);
+
+	return notify;
+}
+
+static int
+netif_rx(netif_t *netif)
+{
+	struct ifnet *ifp = netif->ifp;
+	struct mbuf *m;
+	multicall_entry_t *mcl;
+	mmu_update_t *mmu;
+	gnttab_transfer_t *gop;
+	unsigned long vdata, old_mfn, new_mfn;
+	struct mbuf *rxq = NULL, *rxq_last = NULL;
+	int ret, notify = 0, pkts_dequeued = 0;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	mcl = rx_mcl;
+	mmu = rx_mmu;
+	gop = grant_rx_op;
+
+	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
+		
+		/* Quit if the target domain has no receive buffers */
+		if (netif->rx.req_cons == netif->rx.sring->req_prod)
+			break;
+
+		IFQ_DRV_DEQUEUE(&ifp->if_snd, m);
+		if (m == NULL)
+			break;
+
+		pkts_dequeued++;
+
+		/* Check if we need to copy the data */
+		if (((m->m_flags & (M_RDONLY|M_EXT)) != M_EXT) ||
+			(*m->m_ext.ref_cnt > 1) || m->m_next != NULL) {
+			struct mbuf *n;
+				
+			DDPRINTF("copying mbuf (fl=%x ext=%x rc=%d n=%x)\n",
+				m->m_flags,
+				(m->m_flags & M_EXT) ? m->m_ext.ext_type : 0,
+				(m->m_flags & M_EXT) ? *m->m_ext.ref_cnt : 0,
+				(unsigned int)m->m_next);
+
+			/* Make copy */
+			MGETHDR(n, M_DONTWAIT, MT_DATA);
+			if (!n)
+				goto drop;
+
+			MCLGET(n, M_DONTWAIT);
+			if (!(n->m_flags & M_EXT)) {
+				m_freem(n);
+				goto drop;
+			}
+
+			/* Leave space at front and keep current alignment */
+			n->m_data += 16 + ((unsigned int)m->m_data & 0x3);
+
+			if (m->m_pkthdr.len > M_TRAILINGSPACE(n)) {
+				WPRINTF("pkt to big %d\n", m->m_pkthdr.len);
+				m_freem(n);
+				goto drop;
+			}
+			m_copydata(m, 0, m->m_pkthdr.len, n->m_data);
+			n->m_pkthdr.len = n->m_len = m->m_pkthdr.len;
+			n->m_pkthdr.csum_flags = (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA);
+			m_freem(m);
+			m = n;
+		}
+
+		vdata = (unsigned long)m->m_data;
+		old_mfn = vtomach(vdata) >> PAGE_SHIFT;
+
+		if ((new_mfn = alloc_mfn()) == 0)
+			goto drop;
+
+#ifdef XEN_NETBACK_FIXUP_CSUM
+		/* Check if we need to compute a checksum.  This happens */
+		/* when bridging from one domain to another. */
+		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+			fixup_checksum(m);
+#endif
+
+		xen_phys_machine[(vtophys(vdata) >> PAGE_SHIFT)] = new_mfn;
+
+		mcl->op = __HYPERVISOR_update_va_mapping;
+		mcl->args[0] = vdata;
+		mcl->args[1] = (new_mfn << PAGE_SHIFT) | PG_V | PG_RW | PG_M | PG_A;
+		mcl->args[2] = 0;
+		mcl->args[3] = 0;
+		mcl++;
+
+		gop->mfn = old_mfn;
+		gop->domid = netif->domid;
+		gop->ref = RING_GET_REQUEST(&netif->rx, netif->rx.req_cons)->gref;
+		netif->rx.req_cons++;
+		gop++;
+
+		mmu->ptr = (new_mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+		mmu->val = vtophys(vdata) >> PAGE_SHIFT;  
+		mmu++;
+
+		if (rxq_last)
+			rxq_last->m_nextpkt = m;
+		else
+			rxq = m;
+		rxq_last = m;
+
+		DDPRINTF("XMIT %d bytes to %s\n", m->m_pkthdr.len, IFNAME(netif));
+		DPRINTF_MBUF_LEN(m, 128);
+
+		/* Filled the batch queue? */
+		if ((gop - grant_rx_op) == ARRAY_SIZE(grant_rx_op))
+			break;		
+
+		continue;
+	drop:
+		DDPRINTF("dropping pkt\n");
+		ifp->if_oerrors++;
+		m_freem(m);
+	}
+
+	if (mcl == rx_mcl)
+		return pkts_dequeued;
+
+	mcl->op = __HYPERVISOR_mmu_update;
+	mcl->args[0] = (unsigned long)rx_mmu;
+	mcl->args[1] = mmu - rx_mmu;
+	mcl->args[2] = 0;
+	mcl->args[3] = DOMID_SELF;
+	mcl++;
+
+	mcl[-2].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_ALL;
+	ret = HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
+	BUG_ON(ret != 0);
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_transfer, grant_rx_op, gop - grant_rx_op);
+	BUG_ON(ret != 0);
+
+	mcl = rx_mcl;
+	gop = grant_rx_op;
+
+	while ((m = rxq) != NULL) {
+		int8_t status;
+		uint16_t id, flags = 0;
+
+		rxq = m->m_nextpkt;
+		m->m_nextpkt = NULL;
+
+		/* Rederive the machine addresses. */
+		new_mfn = mcl->args[1] >> PAGE_SHIFT;
+		old_mfn = gop->mfn;
+
+		ifp->if_obytes += m->m_pkthdr.len;
+		ifp->if_opackets++;
+
+		/* The update_va_mapping() must not fail. */
+		BUG_ON(mcl->result != 0);
+
+		/* Setup flags */
+		if ((m->m_pkthdr.csum_flags & CSUM_DELAY_DATA))
+			flags |= NETRXF_csum_blank | NETRXF_data_validated;
+		else if ((m->m_pkthdr.csum_flags & CSUM_DATA_VALID))
+			flags |= NETRXF_data_validated;
+
+		/* Check the reassignment error code. */
+		status = NETIF_RSP_OKAY;
+		if (gop->status != 0) { 
+			DPRINTF("Bad status %d from grant transfer to DOM%u\n",
+				gop->status, netif->domid);
+			/*
+			 * Page no longer belongs to us unless GNTST_bad_page,
+			 * but that should be a fatal error anyway.
+			 */
+			BUG_ON(gop->status == GNTST_bad_page);
+			status = NETIF_RSP_ERROR; 
+		}
+		id = RING_GET_REQUEST(&netif->rx, netif->rx.rsp_prod_pvt)->id;
+		notify |= make_rx_response(netif, id, status,
+					(unsigned long)m->m_data & PAGE_MASK,
+					m->m_pkthdr.len, flags);
+
+		m_freem(m);
+		mcl++;
+		gop++;
+	}
+
+	if (notify)
+		notify_remote_via_irq(netif->irq);
+
+	return pkts_dequeued;
+}
+
+static void
+rx_task_timer(void *arg)
+{
+	DDPRINTF("\n");
+	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
+}
+
+static void
+net_rx_action(void *context, int pending)
+{
+	netif_t *netif, *last_zero_work = NULL;
+
+	DDPRINTF("\n");
+
+	while ((netif = remove_from_rx_schedule_list())) {
+		struct ifnet *ifp = netif->ifp;
+
+		if (netif == last_zero_work) {
+			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+				add_to_rx_schedule_list_tail(netif);
+			netif_put(netif);
+			if (!STAILQ_EMPTY(&rx_sched_list))
+				callout_reset(&rx_task_callout, 1, rx_task_timer, NULL);
+			break;
+		}
+
+		if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
+			if (netif_rx(netif))
+				last_zero_work = NULL;
+			else if (!last_zero_work)
+				last_zero_work = netif;
+			if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
+				add_to_rx_schedule_list_tail(netif);
+		}
+
+		netif_put(netif);
+	}
+}
+
+static void
+netback_start(struct ifnet *ifp)
+{
+	netif_t *netif = (netif_t *)ifp->if_softc;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	add_to_rx_schedule_list_tail(netif);
+	taskqueue_enqueue(taskqueue_swi, &net_rx_task); 
+}
+
+/* Map a grant ref to a ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+	struct gnttab_map_grant_ref op;
+
+	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	if (ring->va == 0)
+		return ENOMEM;
+
+	op.host_addr = ring->va;
+	op.flags = GNTMAP_host_map;
+	op.ref = ref;
+	op.dom = dom;
+	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+	if (op.status) {
+		WPRINTF("grant table op err=%d\n", op.status);
+		kmem_free(kernel_map, ring->va, PAGE_SIZE);
+		ring->va = 0;
+		return EACCES;
+	}
+
+	ring->handle = op.handle;
+	ring->bus_addr = op.dev_bus_addr;
+
+	return 0;
+}
+
+/* Unmap grant ref for a ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	op.host_addr = ring->va;
+	op.dev_bus_addr = ring->bus_addr;
+	op.handle = ring->handle;
+	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+	if (op.status)
+		WPRINTF("grant table op err=%d\n", op.status);
+
+	kmem_free(kernel_map, ring->va, PAGE_SIZE);
+	ring->va = 0;
+}
+
+static int
+connect_rings(netif_t *netif)
+{
+	struct xenbus_device *xdev = netif->xdev;
+	netif_tx_sring_t *txs;
+	netif_rx_sring_t *rxs;
+	unsigned long tx_ring_ref, rx_ring_ref;
+	evtchn_port_t evtchn;
+	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+	int err;
+
+	// Grab FE data and map his memory
+	err = xenbus_gather(NULL, xdev->otherend,
+			"tx-ring-ref", "%lu", &tx_ring_ref,
+		    "rx-ring-ref", "%lu", &rx_ring_ref,
+		    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(xdev, err,
+			"reading %s/ring-ref and event-channel",
+			xdev->otherend);
+		return err;
+	}
+
+	err = map_ring(tx_ring_ref, netif->domid, &netif->tx_ring_ref);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "mapping tx ring");
+		return err;
+	}
+	txs = (netif_tx_sring_t *)netif->tx_ring_ref.va;
+	BACK_RING_INIT(&netif->tx, txs, PAGE_SIZE);
+
+	err = map_ring(rx_ring_ref, netif->domid, &netif->rx_ring_ref);
+	if (err) {
+		unmap_ring(&netif->tx_ring_ref);
+		xenbus_dev_fatal(xdev, err, "mapping rx ring");
+		return err;
+	}
+	rxs = (netif_rx_sring_t *)netif->rx_ring_ref.va;
+	BACK_RING_INIT(&netif->rx, rxs, PAGE_SIZE);
+
+	op.u.bind_interdomain.remote_dom = netif->domid;
+	op.u.bind_interdomain.remote_port = evtchn;
+	err = HYPERVISOR_event_channel_op(&op);
+	if (err) {
+		unmap_ring(&netif->tx_ring_ref);
+		unmap_ring(&netif->rx_ring_ref);
+		xenbus_dev_fatal(xdev, err, "binding event channel");
+		return err;
+	}
+	netif->evtchn = op.u.bind_interdomain.local_port;
+
+	/* bind evtchn to irq handler */
+	netif->irq =
+		bind_evtchn_to_irqhandler(netif->evtchn, "netback",
+			netback_intr, netif, INTR_TYPE_NET|INTR_MPSAFE, &netif->irq_cookie);
+
+	netif->rings_connected = 1;
+
+	DPRINTF("%s connected! evtchn=%d irq=%d\n",
+		IFNAME(netif), netif->evtchn, netif->irq);
+
+	return 0;
+}
+
+static void
+disconnect_rings(netif_t *netif)
+{
+	DPRINTF("\n");
+
+	if (netif->rings_connected) {
+		unbind_from_irqhandler(netif->irq, netif->irq_cookie);
+		netif->irq = 0;
+		unmap_ring(&netif->tx_ring_ref);
+		unmap_ring(&netif->rx_ring_ref);
+		netif->rings_connected = 0;
+	}
+}
+
+static void
+connect(netif_t *netif)
+{
+	if (!netif->xdev ||
+		!netif->attached ||
+		netif->frontend_state != XenbusStateConnected) {
+		return;
+	}
+
+	if (!connect_rings(netif)) {
+		xenbus_switch_state(netif->xdev, NULL, XenbusStateConnected);
+
+		/* Turn on interface */
+		netif->ifp->if_drv_flags |= IFF_DRV_RUNNING;
+		netif->ifp->if_flags |= IFF_UP;
+	}
+}
+
+static int
+netback_remove(struct xenbus_device *xdev)
+{
+	netif_t *netif = xdev->data;
+	device_t ndev;
+
+	DPRINTF("remove %s\n", xdev->nodename);
+
+	if ((ndev = netif->ndev)) {
+		netif->ndev = NULL;
+		mtx_lock(&Giant);
+		device_detach(ndev);
+		mtx_unlock(&Giant);
+	}
+
+	xdev->data = NULL;
+	netif->xdev = NULL;
+	netif_put(netif);
+
+	return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created.  Allocate the basic
+ * structures and the ring buffers for communication with the frontend.
+ * Switch to Connected state.
+ */
+static int
+netback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+	int err;
+	long handle;
+	char *bridge;
+	
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	/* Grab the handle */
+	err = xenbus_scanf(NULL, xdev->nodename, "handle", "%li", &handle);
+	if (err != 1) {
+		xenbus_dev_fatal(xdev, err, "reading handle");
+		return err;
+	}
+
+	/* Check for bridge */
+	bridge = xenbus_read(NULL, xdev->nodename, "bridge", NULL);
+	if (IS_ERR(bridge))
+		bridge = NULL;
+
+	err = xenbus_switch_state(xdev, NULL, XenbusStateInitWait);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing switch state");
+		return err;
+	}
+
+	err = netif_create(handle, xdev, bridge);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "creating netif");
+		return err;
+	}
+
+	err = vif_add_dev(xdev);
+	if (err) {
+		netif_put((netif_t *)xdev->data);
+		xenbus_dev_fatal(xdev, err, "adding vif device");
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * We are reconnecting to the backend, due to a suspend/resume, or a backend
+ * driver restart.  We tear down our netif structure and recreate it, but
+ * leave the device-layer structures intact so that this is transparent to the
+ * rest of the kernel.
+ */
+static int netback_resume(struct xenbus_device *xdev)
+{
+	DPRINTF("node=%s\n", xdev->nodename);
+	return 0;
+}
+
+
+/**
+ * Callback received when the frontend's state changes.
+ */
+static void frontend_changed(struct xenbus_device *xdev,
+							 XenbusState frontend_state)
+{
+	netif_t *netif = xdev->data;
+
+	DPRINTF("state=%d\n", frontend_state);
+	
+	netif->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+	case XenbusStateInitialised:
+		break;
+	case XenbusStateConnected:
+		connect(netif);
+		break;
+	case XenbusStateClosing:
+		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+		break;
+	case XenbusStateClosed:
+		xenbus_remove_device(xdev);
+		break;
+	case XenbusStateUnknown:
+	case XenbusStateInitWait:
+		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+						 frontend_state);
+		break;
+	}
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id netback_ids[] = {
+	{ "vif" },
+	{ "" }
+};
+
+static struct xenbus_driver netback = {
+	.name = "netback",
+	.ids = netback_ids,
+	.probe = netback_probe,
+	.remove = netback_remove,
+	.resume= netback_resume,
+	.otherend_changed = frontend_changed,
+};
+
+static void
+netback_init(void *unused)
+{
+	callout_init(&rx_task_callout, CALLOUT_MPSAFE);
+
+	mmap_vstart = alloc_empty_page_range(MAX_PENDING_REQS);
+	BUG_ON(!mmap_vstart);
+
+	pending_cons = 0;
+	for (pending_prod = 0; pending_prod < MAX_PENDING_REQS; pending_prod++)
+		pending_ring[pending_prod] = pending_prod;
+
+	TASK_INIT(&net_tx_task, 0, net_tx_action, NULL);
+	TASK_INIT(&net_rx_task, 0, net_rx_action, NULL);
+	mtx_init(&tx_sched_list_lock, "nb_tx_sched_lock", "netback tx sched lock", MTX_DEF);
+	mtx_init(&rx_sched_list_lock, "nb_rx_sched_lock", "netback rx sched lock", MTX_DEF);
+
+	DPRINTF("registering %s\n", netback.name);
+
+	xenbus_register_backend(&netback);
+}
+
+SYSINIT(xnbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, netback_init, NULL)
+
+static int
+vif_add_dev(struct xenbus_device *xdev)
+{
+	netif_t *netif = xdev->data;
+	device_t nexus, ndev;
+	devclass_t dc;
+	int err = 0;
+
+	mtx_lock(&Giant);
+
+	/* We will add a vif device as a child of nexus0 (for now) */
+	if (!(dc = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(dc, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		err = ENOENT;
+		goto done;
+	}
+
+
+	/* Create a newbus device representing the vif */
+	ndev = BUS_ADD_CHILD(nexus, 0, "vif", netif->ifp->if_dunit);
+	if (!ndev) {
+		WPRINTF("could not create newbus device %s!\n", IFNAME(netif));
+		err = EFAULT;
+		goto done;
+	}
+	
+	netif_get(netif);
+	device_set_ivars(ndev, netif);
+	netif->ndev = ndev;
+
+	device_probe_and_attach(ndev);
+
+ done:
+
+	mtx_unlock(&Giant);
+
+	return err;
+}
+
+enum {
+	VIF_SYSCTL_DOMID,
+	VIF_SYSCTL_HANDLE,
+	VIF_SYSCTL_TXRING,
+	VIF_SYSCTL_RXRING,
+};
+
+static char *
+vif_sysctl_ring_info(netif_t *netif, int cmd)
+{
+	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+	if (buf) {
+		if (!netif->rings_connected)
+			sprintf(buf, "rings not connected\n");
+		else if (cmd == VIF_SYSCTL_TXRING) {
+			netif_tx_back_ring_t *tx = &netif->tx;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					tx->nr_ents, tx->req_cons,
+					tx->sring->req_prod, tx->sring->req_event,
+					tx->sring->rsp_prod, tx->sring->rsp_event);
+		} else {
+			netif_rx_back_ring_t *rx = &netif->rx;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					rx->nr_ents, rx->req_cons,
+					rx->sring->req_prod, rx->sring->req_event,
+					rx->sring->rsp_prod, rx->sring->rsp_event);
+		}
+	}
+	return buf;
+}
+
+static int
+vif_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev = (device_t)arg1;
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	const char *value;
+	char *buf = NULL;
+	int err;
+
+	switch (arg2) {
+	case VIF_SYSCTL_DOMID:
+		return sysctl_handle_int(oidp, NULL, netif->domid, req);
+	case VIF_SYSCTL_HANDLE:
+		return sysctl_handle_int(oidp, NULL, netif->handle, req);
+	case VIF_SYSCTL_TXRING:
+	case VIF_SYSCTL_RXRING:
+		value = buf = vif_sysctl_ring_info(netif, arg2);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	err = SYSCTL_OUT(req, value, strlen(value));
+	if (buf != NULL)
+		free(buf, M_DEVBUF);
+
+	return err;
+}
+
+/* Newbus vif device driver probe */
+static int
+vif_probe(device_t dev)
+{
+	DDPRINTF("vif%d\n", device_get_unit(dev));
+	return 0;
+}
+
+/* Newbus vif device driver attach */
+static int
+vif_attach(device_t dev) 
+{
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	uint8_t mac[ETHER_ADDR_LEN];
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VIF_SYSCTL_DOMID, vif_sysctl_handler, "I",
+	    "domid of frontend");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "handle", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VIF_SYSCTL_HANDLE, vif_sysctl_handler, "I",
+	    "handle of frontend");
+#ifdef XEN_NETBACK_DEBUG
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "txring", CTLFLAG_RD,
+	    dev, VIF_SYSCTL_TXRING, vif_sysctl_handler, "A",
+	    "tx ring info");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "rxring", CTLFLAG_RD,
+	    dev, VIF_SYSCTL_RXRING, vif_sysctl_handler, "A",
+	    "rx ring info");
+#endif
+
+	memset(mac, 0xff, sizeof(mac));
+	mac[0] &= ~0x01;
+	
+	ether_ifattach(netif->ifp, mac);
+	netif->attached = 1;
+
+	connect(netif);
+
+	if (netif->bridge) {
+		DPRINTF("Adding %s to bridge %s\n", IFNAME(netif), netif->bridge);
+		int err = add_to_bridge(netif->ifp, netif->bridge);
+		if (err) {
+			WPRINTF("Error adding %s to %s; err=%d\n",
+				IFNAME(netif), netif->bridge, err);
+		}
+	}
+
+	return bus_generic_attach(dev);
+}
+
+/* Newbus vif device driver detach */
+static int
+vif_detach(device_t dev)
+{
+	netif_t *netif = (netif_t *)device_get_ivars(dev);
+	struct ifnet *ifp = netif->ifp;
+
+	DDPRINTF("%s\n", IFNAME(netif));
+
+	/* Tell the stack that the interface is no longer active */
+	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
+
+	ether_ifdetach(ifp);
+
+	bus_generic_detach(dev);
+
+	netif->attached = 0;
+
+	netif_put(netif);
+
+	return 0;
+}
+
+static device_method_t vif_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vif_probe),
+	DEVMETHOD(device_attach, 	vif_attach),
+	DEVMETHOD(device_detach,	vif_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	{0, 0}
+};
+
+static devclass_t vif_devclass;
+
+static driver_t vif_driver = {
+	"vif",
+	vif_methods,
+	0,
+};
+
+DRIVER_MODULE(vif, nexus, vif_driver, vif_devclass, 0, 0);
+
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */

Property changes on: dev/xen/netback/netback.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: dev/xen/blkback/blkback.c
===================================================================
--- dev/xen/blkback/blkback.c	(.../stable/6/sys)	(revision 0)
+++ dev/xen/blkback/blkback.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1349 @@
+/*
+ * Copyright (c) 2006, Cisco Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/socket.h>
+#include <sys/queue.h>
+#include <sys/taskqueue.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/disk.h>
+#include <sys/bio.h>
+
+#include <sys/module.h>
+#include <sys/bus.h>
+#include <sys/sysctl.h>
+
+#include <geom/geom.h>
+
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
+
+#include <machine/xen-os.h>
+#include <machine/hypervisor.h>
+#include <machine/hypervisor-ifs.h>
+#include <machine/xen_intr.h>
+#include <machine/evtchn.h>
+#include <machine/xenbus.h>
+#include <machine/gnttab.h>
+#include <machine/xen-public/memory.h>
+#include <dev/xen/xenbus/xenbus_comms.h>
+
+
+#if XEN_BLKBACK_DEBUG
+#define DPRINTF(fmt, args...) \
+    printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTF(fmt, args...) ((void)0)
+#endif
+
+#define WPRINTF(fmt, args...) \
+    printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
+
+#define BLKBACK_INVALID_HANDLE (~0)
+
+struct ring_ref {
+	vm_offset_t va;
+	grant_handle_t handle;
+	uint64_t bus_addr;
+};
+
+typedef struct blkback_info {
+
+	/* Schedule lists */
+	STAILQ_ENTRY(blkback_info) next_req;
+	int on_req_sched_list;
+
+	struct xenbus_device *xdev;
+	XenbusState frontend_state;
+
+	domid_t domid;
+
+	int state;
+	int ring_connected;
+	struct ring_ref rr;
+	blkif_back_ring_t ring;
+	evtchn_port_t evtchn;
+	int irq;
+	void *irq_cookie;
+
+	int ref_cnt;
+
+	int handle;
+	char *mode;
+	char *type;
+	char *dev_name;
+
+	struct vnode *vn;
+	struct cdev *cdev;
+	struct cdevsw *csw;
+	u_int sector_size;
+	int sector_size_shift;
+	off_t media_size;
+	u_int media_num_sectors;
+	int major;
+	int minor;
+	int read_only;
+
+	struct mtx blk_ring_lock;
+
+	device_t ndev;
+
+	/* Stats */
+	int st_rd_req;
+	int st_wr_req;
+	int st_oo_req;
+	int st_err_req;
+} blkif_t;
+
+/*
+ * These are rather arbitrary. They are fairly large because adjacent requests
+ * pulled from a communication ring are quite likely to end up being part of
+ * the same scatter/gather request at the disc.
+ * 
+ * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
+ * 
+ * This will increase the chances of being able to write whole tracks.
+ * 64 should be enough to keep us competitive with Linux.
+ */
+static int blkif_reqs = 64;
+TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
+
+static int mmap_pages;
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a 
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements 
+ * the pendcnt towards zero. When it hits zero, the specified domain has a 
+ * response queued for it, with the saved 'id' passed back.
+ */
+typedef struct pending_req {
+	blkif_t       *blkif;
+	uint64_t       id;
+	int            nr_pages;
+	int            pendcnt;
+	unsigned short operation;
+	int            status;
+	STAILQ_ENTRY(pending_req) free_list;
+} pending_req_t;
+
+static pending_req_t *pending_reqs;
+static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
+	STAILQ_HEAD_INITIALIZER(pending_free);
+static struct mtx pending_free_lock;
+
+static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
+	STAILQ_HEAD_INITIALIZER(req_sched_list);
+static struct mtx req_sched_list_lock;
+
+static unsigned long mmap_vstart;
+static unsigned long *pending_vaddrs;
+static grant_handle_t *pending_grant_handles;
+
+static struct task blk_req_task;
+
+/* Protos */
+static void disconnect_ring(blkif_t *blkif);
+static int vbd_add_dev(struct xenbus_device *xdev);
+
+static inline int vaddr_pagenr(pending_req_t *req, int seg)
+{
+	return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
+}
+
+static inline unsigned long vaddr(pending_req_t *req, int seg)
+{
+	return pending_vaddrs[vaddr_pagenr(req, seg)];
+}
+
+#define pending_handle(_req, _seg) \
+	(pending_grant_handles[vaddr_pagenr(_req, _seg)])
+
+static unsigned long
+alloc_empty_page_range(unsigned long nr_pages)
+{
+	void *pages;
+	int i = 0, j = 0;
+	multicall_entry_t mcl[17];
+	unsigned long mfn_list[16];
+	struct xen_memory_reservation reservation = {
+		.extent_start = mfn_list,
+		.nr_extents   = 0,
+		.address_bits = 0,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+
+	pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+	if (pages == NULL)
+		return 0;
+
+	memset(mcl, 0, sizeof(mcl));
+
+	while (i < nr_pages) {
+		unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
+
+		mcl[j].op = __HYPERVISOR_update_va_mapping;
+		mcl[j].args[0] = va;
+
+		mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
+
+		xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
+
+		if (j == 16 || i == nr_pages) {
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
+
+			reservation.nr_extents = j;
+
+			mcl[j].op = __HYPERVISOR_memory_op;
+			mcl[j].args[0] = XENMEM_decrease_reservation;
+			mcl[j].args[1] =  (unsigned long)&reservation;
+			
+			(void)HYPERVISOR_multicall(mcl, j+1);
+
+			mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
+			j = 0;
+		}
+	}
+
+	return (unsigned long)pages;
+}
+
+static pending_req_t *
+alloc_req(void)
+{
+	pending_req_t *req;
+	mtx_lock(&pending_free_lock);
+	if ((req = STAILQ_FIRST(&pending_free))) {
+		STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
+		STAILQ_NEXT(req, free_list) = NULL;
+	}
+	mtx_unlock(&pending_free_lock);
+	return req;
+}
+
+static void
+free_req(pending_req_t *req)
+{
+	int was_empty;
+
+	mtx_lock(&pending_free_lock);
+	was_empty = STAILQ_EMPTY(&pending_free);
+	STAILQ_INSERT_TAIL(&pending_free, req, free_list);
+	mtx_unlock(&pending_free_lock);
+	if (was_empty)
+		taskqueue_enqueue(taskqueue_swi, &blk_req_task); 
+}
+
+static void
+fast_flush_area(pending_req_t *req)
+{
+	struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int i, invcount = 0;
+	grant_handle_t handle;
+	int ret;
+
+	for (i = 0; i < req->nr_pages; i++) {
+		handle = pending_handle(req, i);
+		if (handle == BLKBACK_INVALID_HANDLE)
+			continue;
+		unmap[invcount].host_addr    = vaddr(req, i);
+		unmap[invcount].dev_bus_addr = 0;
+		unmap[invcount].handle       = handle;
+		pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
+		invcount++;
+	}
+
+	ret = HYPERVISOR_grant_table_op(
+		GNTTABOP_unmap_grant_ref, unmap, invcount);
+	PANIC_IF(ret);
+}
+
+static void
+blkif_get(blkif_t *blkif)
+{
+	atomic_add_int(&blkif->ref_cnt, 1);
+}
+
+static void
+blkif_put(blkif_t *blkif)
+{
+	if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
+		DPRINTF("Removing %x\n", (unsigned int)blkif);
+		disconnect_ring(blkif);
+		if (blkif->mode)
+			free(blkif->mode, M_DEVBUF);			
+		if (blkif->type)
+			free(blkif->type, M_DEVBUF);			
+		if (blkif->dev_name)
+			free(blkif->dev_name, M_DEVBUF);			
+		free(blkif, M_DEVBUF);
+	}
+}
+
+static int
+blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
+{
+	blkif_t *blkif;
+
+	blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
+	if (!blkif)
+		return ENOMEM;
+	
+	DPRINTF("Created %x\n", (unsigned int)blkif);
+
+	blkif->ref_cnt = 1;
+	blkif->domid = xdev->otherend_id;
+	blkif->handle = handle;
+	blkif->mode = mode;
+	blkif->type = type;
+	blkif->dev_name = params;
+	blkif->xdev = xdev;
+	xdev->data = blkif;
+
+	mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
+
+	if (strcmp(mode, "w"))
+		blkif->read_only = 1;
+
+	return 0;
+}
+
+static void
+add_to_req_schedule_list_tail(blkif_t *blkif)
+{
+	if (!blkif->on_req_sched_list) {
+		mtx_lock(&req_sched_list_lock);
+		if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
+			blkif_get(blkif);
+			STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+			blkif->on_req_sched_list = 1;
+			taskqueue_enqueue(taskqueue_swi, &blk_req_task); 
+		}
+		mtx_unlock(&req_sched_list_lock);
+	}
+}
+
+/* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
+   and assumes that the state is connected */
+static void
+add_to_req_schedule_list_tail2(blkif_t *blkif)
+{
+	mtx_lock(&req_sched_list_lock);
+	if (!blkif->on_req_sched_list) {
+		STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
+		blkif->on_req_sched_list = 1;
+	}
+	mtx_unlock(&req_sched_list_lock);
+}
+
+/* Removes blkif from front of list and does not call blkif_put() (caller must) */
+static blkif_t *
+remove_from_req_schedule_list(void)
+{
+	blkif_t *blkif;
+
+	mtx_lock(&req_sched_list_lock);
+
+	if ((blkif = STAILQ_FIRST(&req_sched_list))) {
+		STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
+		STAILQ_NEXT(blkif, next_req) = NULL;
+		blkif->on_req_sched_list = 0;
+	}
+
+	mtx_unlock(&req_sched_list_lock);
+
+	return blkif;
+}
+
+static void
+make_response(blkif_t *blkif, uint64_t id, 
+			  unsigned short op, int st)
+{
+	blkif_response_t *resp;
+	blkif_back_ring_t *blk_ring = &blkif->ring;
+	int more_to_do = 0;
+	int notify;
+
+	mtx_lock(&blkif->blk_ring_lock);
+
+
+	/* Place on the response ring for the relevant domain. */ 
+	resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
+	resp->id        = id;
+	resp->operation = op;
+	resp->status    = st;
+	blk_ring->rsp_prod_pvt++;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
+
+	if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
+		/*
+		 * Tail check for pending requests. Allows frontend to avoid
+		 * notifications if requests are already in flight (lower
+		 * overheads and promotes batching).
+		 */
+		RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
+
+	} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
+		more_to_do = 1;
+
+	mtx_unlock(&blkif->blk_ring_lock);
+
+	if (more_to_do)
+		add_to_req_schedule_list_tail(blkif);
+
+	if (notify)
+		notify_remote_via_irq(blkif->irq);
+}
+
+static void
+end_block_io_op(struct bio *bio)
+{
+	pending_req_t *pending_req = bio->bio_caller2;
+
+	if (bio->bio_error) {
+		DPRINTF("BIO returned error %d for operation on device %s\n",
+				bio->bio_error, pending_req->blkif->dev_name);
+		pending_req->status = BLKIF_RSP_ERROR;
+		pending_req->blkif->st_err_req++;
+	}
+
+#if 0
+	printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
+		   (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
+#endif
+
+	if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
+		fast_flush_area(pending_req);
+		make_response(pending_req->blkif, pending_req->id,
+			      pending_req->operation, pending_req->status);
+		blkif_put(pending_req->blkif);
+		free_req(pending_req);
+	}
+
+	g_destroy_bio(bio);
+}
+
+static void
+dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
+{
+	struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	struct { 
+		unsigned long buf; unsigned int nsec;
+	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	unsigned int nseg = req->nr_segments, nr_sects = 0;
+	struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+	int operation, ret, i, nbio = 0;
+
+	/* Check that number of segments is sane. */
+	if (unlikely(nseg == 0) || 
+	    unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
+		DPRINTF("Bad number of segments in request (%d)\n", nseg);
+		goto fail_response;
+	}
+
+	if (req->operation == BLKIF_OP_WRITE) {
+		if (blkif->read_only) {
+			DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
+			goto fail_response;
+		}
+		operation = BIO_WRITE;
+	} else
+		operation = BIO_READ;
+
+	pending_req->blkif     = blkif;
+	pending_req->id        = req->id;
+	pending_req->operation = req->operation;
+	pending_req->status    = BLKIF_RSP_OKAY;
+	pending_req->nr_pages  = nseg;
+
+	for (i = 0; i < nseg; i++) {
+		seg[i].nsec = req->seg[i].last_sect - 
+			req->seg[i].first_sect + 1;
+
+		if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
+		    (seg[i].nsec <= 0))
+			goto fail_response;
+		nr_sects += seg[i].nsec;
+
+		map[i].host_addr = vaddr(pending_req, i);
+		map[i].dom = blkif->domid;
+		map[i].ref = req->seg[i].gref;
+		map[i].flags = GNTMAP_host_map;
+		if (operation == BIO_WRITE)
+			map[i].flags |= GNTMAP_readonly;
+	}
+
+	/* Convert to the disk's sector size */
+	nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
+
+	ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
+	PANIC_IF(ret);
+
+	for (i = 0; i < nseg; i++) {
+		if (unlikely(map[i].status != 0)) {
+			DPRINTF("invalid buffer -- could not remap it\n");
+			goto fail_flush;
+		}
+
+		pending_handle(pending_req, i) = map[i].handle;
+#if 0
+		/* Can't do this in FreeBSD since vtophys() returns the pfn */
+		/* of the remote domain who loaned us the machine page - DPT */
+		xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
+			map[i]dev_bus_addr >> PAGE_SHIFT;
+#endif
+		seg[i].buf  = map[i].dev_bus_addr | 
+			(req->seg[i].first_sect << 9);
+	}
+
+	if (req->sector_number + nr_sects > blkif->media_num_sectors) {
+		DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
+			operation == BIO_READ ? "read" : "write",
+			req->sector_number,
+			req->sector_number + nr_sects, blkif->dev_name); 
+		goto fail_flush;
+	}
+
+	for (i = 0; i < nseg; i++) {
+		struct bio *bio;
+
+		if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
+			DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
+			goto fail_put_bio;
+		}
+
+		bio = biolist[nbio++] = g_new_bio();
+		if (unlikely(bio == NULL))
+			goto fail_put_bio;
+
+		bio->bio_cmd = operation;
+		bio->bio_offset = req->sector_number << blkif->sector_size_shift;
+		bio->bio_length = seg[i].nsec << 9;
+		bio->bio_bcount = bio->bio_length;
+		bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
+		bio->bio_done = end_block_io_op;
+		bio->bio_caller2 = pending_req;
+		bio->bio_dev = blkif->cdev;
+
+		req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
+#if 0
+		printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
+			(unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
+			blkif->cdev->si_iosize_max, seg[i].buf);
+#endif
+	}
+
+	pending_req->pendcnt = nbio;
+	blkif_get(blkif);
+
+	for (i = 0; i < nbio; i++)
+		(*blkif->csw->d_strategy)(biolist[i]);
+
+	return;
+
+ fail_put_bio:
+	for (i = 0; i < (nbio-1); i++)
+		g_destroy_bio(biolist[i]);
+ fail_flush:
+	fast_flush_area(pending_req);
+ fail_response:
+	make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
+	free_req(pending_req);
+}
+
+static void
+blk_req_action(void *context, int pending)
+{
+	blkif_t *blkif;
+
+	DPRINTF("\n");
+
+	while (!STAILQ_EMPTY(&req_sched_list)) {
+		blkif_back_ring_t *blk_ring;
+		RING_IDX rc, rp;
+
+		blkif = remove_from_req_schedule_list();
+
+		blk_ring = &blkif->ring;
+		rc = blk_ring->req_cons;
+		rp = blk_ring->sring->req_prod;
+		rmb(); /* Ensure we see queued requests up to 'rp'. */
+
+		while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
+			blkif_request_t *req;
+			pending_req_t *pending_req;
+
+			pending_req = alloc_req();
+			if (pending_req == NULL)
+				goto out_of_preqs;
+
+			req = RING_GET_REQUEST(blk_ring, rc);
+			blk_ring->req_cons = ++rc; /* before make_response() */
+
+			switch (req->operation) {
+			case BLKIF_OP_READ:
+				blkif->st_rd_req++;
+				dispatch_rw_block_io(blkif, req, pending_req);
+				break;
+			case BLKIF_OP_WRITE:
+				blkif->st_wr_req++;
+				dispatch_rw_block_io(blkif, req, pending_req);
+				break;
+			default:
+				blkif->st_err_req++;
+				DPRINTF("error: unknown block io operation [%d]\n",
+						req->operation);
+				make_response(blkif, req->id, req->operation,
+							  BLKIF_RSP_ERROR);
+				free_req(pending_req);
+				break;
+			}
+		}
+
+		blkif_put(blkif);
+	}
+
+	return;
+
+ out_of_preqs:
+	/* We ran out of pending req structs */
+	/* Just requeue interface and wait to be rescheduled to run when one is freed */
+	add_to_req_schedule_list_tail2(blkif);
+	blkif->st_oo_req++;
+}
+
+/* Handle interrupt from a frontend */
+static void
+blkback_intr(void *arg)
+{
+	blkif_t *blkif = arg;
+	DPRINTF("%x\n", (unsigned int)blkif);
+	add_to_req_schedule_list_tail(blkif);
+}
+
+/* Map grant ref for ring */
+static int
+map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
+{
+	struct gnttab_map_grant_ref op;
+
+	ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
+	if (ring->va == 0)
+		return ENOMEM;
+
+	op.host_addr = ring->va;
+	op.flags = GNTMAP_host_map;
+	op.ref = ref;
+	op.dom = dom;
+	HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
+	if (op.status) {
+		WPRINTF("grant table op err=%d\n", op.status);
+		kmem_free(kernel_map, ring->va, PAGE_SIZE);
+		ring->va = 0;
+		return EACCES;
+	}
+
+	ring->handle = op.handle;
+	ring->bus_addr = op.dev_bus_addr;
+
+	return 0;
+}
+
+/* Unmap grant ref for ring */
+static void
+unmap_ring(struct ring_ref *ring)
+{
+	struct gnttab_unmap_grant_ref op;
+
+	op.host_addr = ring->va;
+	op.dev_bus_addr = ring->bus_addr;
+	op.handle = ring->handle;
+	HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
+	if (op.status)
+		WPRINTF("grant table op err=%d\n", op.status);
+
+	kmem_free(kernel_map, ring->va, PAGE_SIZE);
+	ring->va = 0;
+}
+
+static int
+connect_ring(blkif_t *blkif)
+{
+	struct xenbus_device *xdev = blkif->xdev;
+	blkif_sring_t *ring;
+	unsigned long ring_ref;
+	evtchn_port_t evtchn;
+	evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
+	int err;
+
+	if (blkif->ring_connected)
+		return 0;
+
+	// Grab FE data and map his memory
+	err = xenbus_gather(NULL, xdev->otherend,
+			"ring-ref", "%lu", &ring_ref,
+		    "event-channel", "%u", &evtchn, NULL);
+	if (err) {
+		xenbus_dev_fatal(xdev, err,
+			"reading %s/ring-ref and event-channel",
+			xdev->otherend);
+		return err;
+	}
+
+	err = map_ring(ring_ref, blkif->domid, &blkif->rr);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "mapping ring");
+		return err;
+	}
+	ring = (blkif_sring_t *)blkif->rr.va;
+	BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
+
+	op.u.bind_interdomain.remote_dom = blkif->domid;
+	op.u.bind_interdomain.remote_port = evtchn;
+	err = HYPERVISOR_event_channel_op(&op);
+	if (err) {
+		unmap_ring(&blkif->rr);
+		xenbus_dev_fatal(xdev, err, "binding event channel");
+		return err;
+	}
+	blkif->evtchn = op.u.bind_interdomain.local_port;
+
+	/* bind evtchn to irq handler */
+	blkif->irq =
+		bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
+			blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
+
+	blkif->ring_connected = 1;
+
+	DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
+			(unsigned int)blkif, blkif->evtchn, blkif->irq);
+
+	return 0;
+}
+
+static void
+disconnect_ring(blkif_t *blkif)
+{
+	DPRINTF("\n");
+
+	if (blkif->ring_connected) {
+		unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
+		blkif->irq = 0;
+		unmap_ring(&blkif->rr);
+		blkif->ring_connected = 0;
+	}
+}
+
+static void
+connect(blkif_t *blkif)
+{
+	struct xenbus_transaction *xbt;
+	struct xenbus_device *xdev = blkif->xdev;
+	int err;
+
+	if (!blkif->ring_connected ||
+		blkif->vn == NULL ||
+		blkif->state == XenbusStateConnected)
+		return;
+
+	DPRINTF("%s\n", xdev->otherend);
+
+	/* Supply the information about the device the frontend needs */
+again:
+	xbt = xenbus_transaction_start();
+	if (IS_ERR(xbt)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(xbt),
+						 "Error writing configuration for backend "
+						 "(start transaction)");
+		return;
+	}
+
+	err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
+				blkif->media_num_sectors);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/sectors",
+				 xdev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
+				blkif->read_only ? VDISK_READONLY : 0);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/info",
+				 xdev->nodename);
+		goto abort;
+	}
+	err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
+			    blkif->sector_size);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
+				 xdev->nodename);
+		goto abort;
+	}
+
+	err = xenbus_transaction_end(xbt, 0);
+	if (err == -EAGAIN)
+		goto again;
+	if (err)
+		xenbus_dev_fatal(xdev, err, "ending transaction");
+
+	err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
+	if (err)
+		xenbus_dev_fatal(xdev, err, "switching to Connected state",
+				 xdev->nodename);
+
+	blkif->state = XenbusStateConnected;
+
+	return;
+
+ abort:
+	xenbus_transaction_end(xbt, 1);
+}
+
+static int
+blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
+{
+	int err;
+	char *p, *mode = NULL, *type = NULL, *params = NULL;
+	long handle;
+
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	p = strrchr(xdev->otherend, '/') + 1;
+	handle = strtoul(p, NULL, 0);
+
+	mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
+	if (IS_ERR(mode)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
+		err = PTR_ERR(mode);
+		goto error;
+	}
+	
+	type = xenbus_read(NULL, xdev->nodename, "type", NULL);
+	if (IS_ERR(type)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
+		err = PTR_ERR(type);
+		goto error;
+	}
+	
+	params = xenbus_read(NULL, xdev->nodename, "params", NULL);
+	if (IS_ERR(type)) {
+		xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
+		err = PTR_ERR(params);
+		goto error;
+	}
+	
+	err = blkif_create(xdev, handle, mode, type, params);
+	if (err) {
+		xenbus_dev_fatal(xdev, err, "creating blkif");
+		goto error;
+	}
+
+	err = vbd_add_dev(xdev);
+	if (err) {
+		blkif_put((blkif_t *)xdev->data);
+		xenbus_dev_fatal(xdev, err, "adding vbd device");
+	}
+
+	return err;
+
+ error:
+	if (mode)
+		free(mode, M_DEVBUF);
+	if (type)
+		free(type, M_DEVBUF);
+	if (params)
+		free(params, M_DEVBUF);
+	return err;
+}
+
+static int
+blkback_remove(struct xenbus_device *xdev)
+{
+	blkif_t *blkif = xdev->data;
+	device_t ndev;
+
+	DPRINTF("node=%s\n", xdev->nodename);
+
+	blkif->state = XenbusStateClosing;
+
+	if ((ndev = blkif->ndev)) {
+		blkif->ndev = NULL;
+		mtx_lock(&Giant);
+		device_detach(ndev);
+		mtx_unlock(&Giant);
+	}
+
+	xdev->data = NULL;
+	blkif->xdev = NULL;
+	blkif_put(blkif);
+
+	return 0;
+}
+
+static int
+blkback_resume(struct xenbus_device *xdev)
+{
+	DPRINTF("node=%s\n", xdev->nodename);
+	return 0;
+}
+
+static void
+frontend_changed(struct xenbus_device *xdev,
+				 XenbusState frontend_state)
+{
+	blkif_t *blkif = xdev->data;
+
+	DPRINTF("state=%d\n", frontend_state);
+
+	blkif->frontend_state = frontend_state;
+
+	switch (frontend_state) {
+	case XenbusStateInitialising:
+		break;
+	case XenbusStateInitialised:
+	case XenbusStateConnected:
+		connect_ring(blkif);
+		connect(blkif);
+		break;
+	case XenbusStateClosing:
+		xenbus_switch_state(xdev, NULL, XenbusStateClosing);
+		break;
+	case XenbusStateClosed:
+		xenbus_remove_device(xdev);
+		break;
+	case XenbusStateUnknown:
+	case XenbusStateInitWait:
+		xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
+						 frontend_state);
+		break;
+	}
+}
+
+/* ** Driver registration ** */
+
+static struct xenbus_device_id blkback_ids[] = {
+	{ "vbd" },
+	{ "" }
+};
+
+static struct xenbus_driver blkback = {
+	.name = "blkback",
+	.ids = blkback_ids,
+	.probe = blkback_probe,
+	.remove = blkback_remove,
+	.resume = blkback_resume,
+	.otherend_changed = frontend_changed,
+};
+
+static void
+blkback_init(void *unused)
+{
+	int i;
+
+	TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
+	mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
+
+	mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
+
+	mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
+	pending_reqs = malloc(sizeof(pending_reqs[0]) *
+		blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
+	pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
+		mmap_pages, M_DEVBUF, M_NOWAIT);
+	pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
+		mmap_pages, M_DEVBUF, M_NOWAIT);
+	mmap_vstart = alloc_empty_page_range(mmap_pages);
+	if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
+		if (pending_reqs)
+			free(pending_reqs, M_DEVBUF);
+		if (pending_grant_handles)
+			free(pending_grant_handles, M_DEVBUF);
+		if (pending_vaddrs)
+			free(pending_vaddrs, M_DEVBUF);
+		WPRINTF("out of memory\n");
+		return;
+	}
+
+	for (i = 0; i < mmap_pages; i++) {
+		pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
+		pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
+	}
+
+	for (i = 0; i < blkif_reqs; i++) {
+		STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
+	}
+
+	DPRINTF("registering %s\n", blkback.name);
+	xenbus_register_backend(&blkback);
+}
+
+SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
+
+static void
+close_device(blkif_t *blkif)
+{
+	DPRINTF("closing dev=%s\n", blkif->dev_name);
+	if (blkif->vn) {
+		int flags = FREAD;
+
+		if (!blkif->read_only)
+			flags |= FWRITE;
+
+		if (blkif->csw) {
+			dev_relthread(blkif->cdev);
+			blkif->csw = NULL;
+		}
+
+		(void)vn_close(blkif->vn, flags, NOCRED, curthread);
+		blkif->vn = NULL;
+	}
+}
+
+static int
+open_device(blkif_t *blkif)
+{
+	struct nameidata nd;
+	struct vattr vattr;
+	struct cdev *dev;
+	struct cdevsw *devsw;
+	int flags = FREAD, err = 0;
+
+	DPRINTF("opening dev=%s\n", blkif->dev_name);
+
+	if (!blkif->read_only)
+		flags |= FWRITE;
+
+	if (!curthread->td_proc->p_fd->fd_cdir) {
+		curthread->td_proc->p_fd->fd_cdir = rootvnode;
+		VREF(rootvnode);
+	}
+	if (!curthread->td_proc->p_fd->fd_rdir) {
+		curthread->td_proc->p_fd->fd_rdir = rootvnode;
+		VREF(rootvnode);
+	}
+	if (!curthread->td_proc->p_fd->fd_jdir) {
+		curthread->td_proc->p_fd->fd_jdir = rootvnode;
+		VREF(rootvnode);
+	}
+
+ again:
+	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
+	err = vn_open(&nd, &flags, 0, -1);
+	if (err) {
+		if (blkif->dev_name[0] != '/') {
+			char *dev_path = "/dev/";
+			char *dev_name;
+
+			/* Try adding device path at beginning of name */
+			dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
+			if (dev_name) {
+				sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
+				free(blkif->dev_name, M_DEVBUF);			
+				blkif->dev_name = dev_name;
+				goto again;
+			}
+		}
+		xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
+		return err;
+	}
+	NDFREE(&nd, NDF_ONLY_PNBUF);
+		
+	blkif->vn = nd.ni_vp;
+
+	/* We only support disks for now */
+	if (!vn_isdisk(blkif->vn, &err)) {
+		xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
+		VOP_UNLOCK(blkif->vn, 0, curthread);
+		goto error;
+	}
+
+	blkif->cdev = blkif->vn->v_rdev;
+	blkif->csw = dev_refthread(blkif->cdev);
+	PANIC_IF(blkif->csw == NULL);
+
+	err = VOP_GETATTR(blkif->vn, &vattr, NOCRED);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error getting vnode attributes for device %s", blkif->dev_name);
+		VOP_UNLOCK(blkif->vn, 0, curthread);
+		goto error;
+	}
+
+	VOP_UNLOCK(blkif->vn, 0, curthread);
+
+	dev = blkif->vn->v_rdev;
+	devsw = dev->si_devsw;
+	if (!devsw->d_ioctl) {
+		err = ENODEV;
+		xenbus_dev_fatal(blkif->xdev, err,
+			"no d_ioctl for device %s!", blkif->dev_name);
+		goto error;
+	}
+
+	err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
+		goto error;
+	}
+	blkif->sector_size_shift = fls(blkif->sector_size) - 1;
+
+	err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
+	if (err) {
+		xenbus_dev_fatal(blkif->xdev, err,
+			"error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
+		goto error;
+	}
+	blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
+
+	blkif->major = umajor(vattr.va_rdev);
+	blkif->minor = uminor(vattr.va_rdev);
+
+	DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
+			blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
+
+	return 0;
+
+ error:
+	close_device(blkif);
+	return err;
+}
+
+static int
+vbd_add_dev(struct xenbus_device *xdev)
+{
+	blkif_t *blkif = xdev->data;
+	device_t nexus, ndev;
+	devclass_t dc;
+	int err = 0;
+
+	mtx_lock(&Giant);
+
+	/* We will add a vbd device as a child of nexus0 (for now) */
+	if (!(dc = devclass_find("nexus")) ||
+		!(nexus = devclass_get_device(dc, 0))) {
+		WPRINTF("could not find nexus0!\n");
+		err = ENOENT;
+		goto done;
+	}
+
+
+	/* Create a newbus device representing the vbd */
+	ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
+	if (!ndev) {
+		WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
+		err = EFAULT;
+		goto done;
+	}
+	
+	blkif_get(blkif);
+	device_set_ivars(ndev, blkif);
+	blkif->ndev = ndev;
+
+	device_probe_and_attach(ndev);
+
+ done:
+
+	mtx_unlock(&Giant);
+
+	return err;
+}
+
+enum {
+	VBD_SYSCTL_DOMID,
+	VBD_SYSCTL_ST_RD_REQ,
+	VBD_SYSCTL_ST_WR_REQ,
+	VBD_SYSCTL_ST_OO_REQ,
+	VBD_SYSCTL_ST_ERR_REQ,
+	VBD_SYSCTL_RING,
+};
+
+static char *
+vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
+{
+	char *buf = malloc(256, M_DEVBUF, M_WAITOK);
+	if (buf) {
+		if (!blkif->ring_connected)
+			sprintf(buf, "ring not connected\n");
+		else {
+			blkif_back_ring_t *ring = &blkif->ring;
+			sprintf(buf, "nr_ents=%x req_cons=%x"
+					" req_prod=%x req_event=%x"
+					" rsp_prod=%x rsp_event=%x",
+					ring->nr_ents, ring->req_cons,
+					ring->sring->req_prod, ring->sring->req_event,
+					ring->sring->rsp_prod, ring->sring->rsp_event);
+		}
+	}
+	return buf;
+}
+
+static int
+vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
+{
+	device_t dev = (device_t)arg1;
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+	const char *value;
+	char *buf = NULL;
+	int err;
+
+	switch (arg2) {
+	case VBD_SYSCTL_DOMID:
+		return sysctl_handle_int(oidp, NULL, blkif->domid, req);
+	case VBD_SYSCTL_ST_RD_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
+	case VBD_SYSCTL_ST_WR_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
+	case VBD_SYSCTL_ST_OO_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
+	case VBD_SYSCTL_ST_ERR_REQ:
+		return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
+	case VBD_SYSCTL_RING:
+		value = buf = vbd_sysctl_ring_info(blkif, arg2);
+		break;
+	default:
+		return (EINVAL);
+	}
+
+	err = SYSCTL_OUT(req, value, strlen(value));
+	if (buf != NULL)
+		free(buf, M_DEVBUF);
+
+	return err;
+}
+
+/* Newbus vbd device driver probe */
+static int
+vbd_probe(device_t dev)
+{
+	DPRINTF("vbd%d\n", device_get_unit(dev));
+	return 0;
+}
+
+/* Newbus vbd device driver attach */
+static int
+vbd_attach(device_t dev) 
+{
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+	DPRINTF("%s\n", blkif->dev_name);
+
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
+	    "domid of frontend");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
+	    "number of read reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
+	    "number of write reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
+	    "number of deferred reqs");
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
+	    dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
+	    "number of reqs that returned error");
+#if XEN_BLKBACK_DEBUG
+	SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
+	    OID_AUTO, "ring", CTLFLAG_RD,
+	    dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
+	    "req ring info");
+#endif
+
+	if (!open_device(blkif))
+		connect(blkif);
+
+	return bus_generic_attach(dev);
+}
+
+/* Newbus vbd device driver detach */
+static int
+vbd_detach(device_t dev)
+{
+	blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
+
+	DPRINTF("%s\n", blkif->dev_name);
+
+	close_device(blkif);
+
+	bus_generic_detach(dev);
+
+	blkif_put(blkif);
+
+	return 0;
+}
+
+static device_method_t vbd_methods[] = {
+	/* Device interface */
+	DEVMETHOD(device_probe,		vbd_probe),
+	DEVMETHOD(device_attach, 	vbd_attach),
+	DEVMETHOD(device_detach,	vbd_detach),
+	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
+	DEVMETHOD(device_suspend,	bus_generic_suspend),
+	DEVMETHOD(device_resume,	bus_generic_resume),
+	{0, 0}
+};
+
+static devclass_t vbd_devclass;
+
+static driver_t vbd_driver = {
+	"vbd",
+	vbd_methods,
+	0,
+};
+
+DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: t
+ * End:
+ */

Property changes on: dev/xen/blkback/blkback.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: libkern/strcspn.c
===================================================================
--- libkern/strcspn.c	(.../stable/6/sys)	(revision 0)
+++ libkern/strcspn.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/libkern.h>
+#include <sys/types.h>
+#include <sys/limits.h>
+
+#define	IDX(c)	((u_char)(c) / LONG_BIT)
+#define	BIT(c)	((u_long)1 << ((u_char)(c) % LONG_BIT))
+
+size_t
+strcspn(const char *s, const char *charset)
+{
+	/*
+	 * NB: idx and bit are temporaries whose use causes gcc 3.4.2 to
+	 * generate better code.  Without them, gcc gets a little confused.
+	 */
+	const char *s1;
+	u_long bit;
+	u_long tbl[(UCHAR_MAX + 1) / LONG_BIT];
+	int idx;
+
+	if(*s == '\0')
+		return (0);
+
+#if LONG_BIT == 64	/* always better to unroll on 64-bit architectures */
+	tbl[0] = 1;
+	tbl[3] = tbl[2] = tbl[1] = 0;
+#else
+	for (tbl[0] = idx = 1; idx < sizeof(tbl) / sizeof(tbl[0]); idx++)
+		tbl[idx] = 0;
+#endif
+	for (; *charset != '\0'; charset++) {
+		idx = IDX(*charset);
+		bit = BIT(*charset);
+		tbl[idx] |= bit;
+	}
+
+	for(s1 = s; ; s1++) {
+		idx = IDX(*s1);
+		bit = BIT(*s1);
+		if ((tbl[idx] & bit) != 0)
+			break;
+	}
+	return (s1 - s);
+}

Property changes on: libkern/strcspn.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/smp.h
===================================================================
--- i386/include/smp.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/smp.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -68,7 +68,9 @@
 void	ipi_all(u_int ipi);
 void	ipi_all_but_self(u_int ipi);
 void	ipi_self(u_int ipi);
+#ifndef XEN
 void 	ipi_bitmap_handler(struct clockframe frame);
+#endif
 u_int	mp_bootaddress(u_int);
 int	mp_grab_cpu_hlt(void);
 void	mp_topology(void);
@@ -85,7 +87,14 @@
 int ipi_nmi_handler(void);
 void ipi_nmi_selected(u_int32_t cpus);
 #endif
+#ifdef XEN
+void ipi_to_irq_init(void);
 
+#define RESCHEDULE_VECTOR	0
+#define CALL_FUNCTION_VECTOR	1
+#define NR_IPIS			2
+
+#endif
 #endif /* !LOCORE */
 #endif /* SMP */
 
Index: i386/include/param.h
===================================================================
--- i386/include/param.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/param.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -86,9 +86,11 @@
 #ifdef PAE
 #define NPGPTD		4
 #define PDRSHIFT	21		/* LOG2(NBPDR) */
+#define	NPGPTD_SHIFT	9
 #else
 #define NPGPTD		1
 #define PDRSHIFT	22		/* LOG2(NBPDR) */
+#define	NPGPTD_SHIFT	10
 #endif
 
 #define NBPTD		(NPGPTD<<PAGE_SHIFT)
Index: i386/include/asmacros.h
===================================================================
--- i386/include/asmacros.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/asmacros.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -134,6 +134,46 @@
 #define MEXITCOUNT
 #endif /* GPROF */
 
+/*
+ * Setup the kernel segment registers.
+ */
+#define	SET_KERNEL_SREGS						\
+	movl	$KDSEL, %eax ;	/* reload with kernel's data segment */	\
+	movl	%eax, %ds ;						\
+	movl	%eax, %es ;						\
+	movl	$KPSEL, %eax ;	/* reload with per-CPU data segment */	\
+	movl	%eax, %fs
+
+#ifdef XEN
+#define LOAD_CR3(reg)          \
+        movl    reg,PCPU(CR3); \
+        pushl   %ecx ;         \
+        pushl   %edx ;         \
+        pushl   %esi ;         \
+        pushl   reg ;          \
+        call    xen_load_cr3 ;     \
+        addl    $4,%esp ;      \
+        popl    %esi ;         \
+        popl    %edx ;         \
+        popl    %ecx ;         \
+ 
+#define READ_CR3(reg)   movl PCPU(CR3),reg;
+#define LLDT(arg)                 \
+        pushl   %edx ;                    \
+        pushl   %eax ;                    \
+        xorl    %eax,%eax ;               \
+        movl    %eax,%gs ;                \
+        call    i386_reset_ldt ;          \
+        popl    %eax ;                    \
+        popl    %edx 
+#define CLI             call ni_cli
+#else
+#define LOAD_CR3(reg)   movl reg,%cr3; 
+#define READ_CR3(reg)   movl %cr3,reg; 
+#define LLDT(arg)       lldt arg; 
+#define CLI             cli 
+#endif /* !XEN */ 
+
 #ifdef LOCORE
 /*
  * Convenience macros for declaring interrupt entry points and trap
@@ -145,4 +185,30 @@
 
 #endif /* LOCORE */
 
+#ifdef __STDC__ 
+#define ELFNOTE(name, type, desctype, descdata...) \
+.pushsection .note.name                 ;       \
+  .align 4                              ;       \
+  .long 2f - 1f         /* namesz */    ;       \
+  .long 4f - 3f         /* descsz */    ;       \
+  .long type                            ;       \
+1:.asciz #name                          ;       \
+2:.align 4                              ;       \
+3:desctype descdata                     ;       \
+4:.align 4                              ;       \
+.popsection 
+#else /* !__STDC__, i.e. -traditional */ 
+#define ELFNOTE(name, type, desctype, descdata) \
+.pushsection .note.name                 ;       \
+  .align 4                              ;       \
+  .long 2f - 1f         /* namesz */    ;       \
+  .long 4f - 3f         /* descsz */    ;       \
+  .long type                            ;       \
+1:.asciz "name"                         ;       \
+2:.align 4                              ;       \
+3:desctype descdata                     ;       \
+4:.align 4                              ;       \
+.popsection
+#endif /* __STDC__ */ 
+
 #endif /* !_MACHINE_ASMACROS_H_ */
Index: i386/include/apicvar.h
===================================================================
--- i386/include/apicvar.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/apicvar.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -113,6 +113,17 @@
 #define	APIC_THERMAL_INT (APIC_LOCAL_INTS + 1)
 
 #define	APIC_IPI_INTS	(APIC_LOCAL_INTS + 2)
+#ifdef XEN
+#define	IPI_RENDEZVOUS		(2)	/* Inter-CPU rendezvous. */
+#define	IPI_INVLTLB		(3)	/* TLB Shootdown IPIs */
+#define	IPI_INVLPG		(4)
+#define	IPI_INVLRNG		(5)
+#define	IPI_INVLCACHE		(6)
+#define	IPI_LAZYPMAP		(7)	/* Lazy pmap release. */
+/* Vector to handle bitmap based IPIs */
+#define	IPI_BITMAP_VECTOR	(8)
+
+#else
 #define	IPI_RENDEZVOUS	(APIC_IPI_INTS)		/* Inter-CPU rendezvous. */
 #define	IPI_INVLTLB	(APIC_IPI_INTS + 1)	/* TLB Shootdown IPIs */
 #define	IPI_INVLPG	(APIC_IPI_INTS + 2)
@@ -121,6 +132,7 @@
 #define	IPI_LAZYPMAP	(APIC_IPI_INTS + 5)	/* Lazy pmap release. */
 /* Vector to handle bitmap based IPIs */
 #define	IPI_BITMAP_VECTOR	(APIC_IPI_INTS + 6) 
+#endif
 
 /* IPIs handled by IPI_BITMAPED_VECTOR  (XXX ups is there a better place?) */
 #define	IPI_AST		0 	/* Generate software trap. */
Index: i386/include/cpufunc.h
===================================================================
--- i386/include/cpufunc.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/cpufunc.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -42,6 +42,17 @@
 #error this file needs sys/cdefs.h as a prerequisite
 #endif
 
+#ifdef XEN
+extern void xen_cli(void);
+extern void xen_sti(void);
+extern u_int xen_rcr2(void);
+extern void xen_load_cr3(u_int data);
+extern void xen_tlb_flush(void);
+extern void xen_invlpg(u_int addr);
+extern int xen_save_and_cli(void);
+extern void xen_restore_flags(u_int eflags);
+#endif
+
 struct region_descriptor;
 
 #define readb(va)	(*(volatile u_int8_t *) (va))
@@ -81,7 +92,11 @@
 static __inline void
 disable_intr(void)
 {
+#ifdef XEN
+	xen_cli();
+#else	
 	__asm __volatile("cli" : : : "memory");
+#endif
 }
 
 static __inline void
@@ -103,7 +118,11 @@
 static __inline void
 enable_intr(void)
 {
+#ifdef XEN
+	xen_sti();
+#else
 	__asm __volatile("sti");
+#endif
 }
 
 #ifdef _KERNEL
@@ -392,6 +411,9 @@
 {
 	u_int	data;
 
+#ifdef XEN
+	return (xen_rcr2());
+#endif	
 	__asm __volatile("movl %%cr2,%0" : "=r" (data));
 	return (data);
 }
@@ -399,8 +421,11 @@
 static __inline void
 load_cr3(u_int data)
 {
-
+#ifdef XEN
+	xen_load_cr3(data);
+#else
 	__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
+#endif
 }
 
 static __inline u_int
@@ -433,8 +458,11 @@
 static __inline void
 invltlb(void)
 {
-
+#ifdef XEN
+	xen_tlb_flush();
+#else	
 	load_cr3(rcr3());
+#endif
 }
 
 /*
@@ -444,8 +472,11 @@
 static __inline void
 invlpg(u_int addr)
 {
-
+#ifdef XEN
+	xen_invlpg(addr);
+#else
 	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
+#endif
 }
 
 static __inline u_int
@@ -619,15 +650,23 @@
 {
 	register_t eflags;
 
+#ifdef XEN
+	return (xen_save_and_cli());
+#endif
 	eflags = read_eflags();
 	disable_intr();
+
 	return (eflags);
 }
 
 static __inline void
 intr_restore(register_t eflags)
 {
+#ifdef XEN
+	xen_restore_flags(eflags);
+#else
 	write_eflags(eflags);
+#endif
 }
 
 #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */
Index: i386/include/pcpu.h
===================================================================
--- i386/include/pcpu.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/pcpu.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -45,7 +45,53 @@
  * to each CPU's data can be set up for things like "check curproc on all
  * other processors"
  */
+
+#ifdef XEN
+#ifndef NR_VIRQS
+#define	NR_VIRQS	24
+#endif
+#ifndef NR_IPIS
+#define	NR_IPIS		2
+#endif
+
+/* These are peridically updated in shared_info, and then copied here. */
+struct shadow_time_info {
+	uint64_t tsc_timestamp;     /* TSC at last update of time vals.  */
+	uint64_t system_timestamp;  /* Time, in nanosecs, since boot.    */
+	uint32_t tsc_to_nsec_mul;
+	uint32_t tsc_to_usec_mul;
+	int tsc_shift;
+	uint32_t version;
+};
+
+
 #define	PCPU_MD_FIELDS							\
+	struct	pcpu *pc_prvspace;	/* Self-reference */		\
+	struct	pmap *pc_curpmap;					\
+	struct	i386tss pc_common_tss;					\
+	struct	segment_descriptor pc_common_tssd;			\
+	struct	segment_descriptor *pc_tss_gdt;				\
+	struct	segment_descriptor *pc_fsgs_gdt;			\
+	vm_paddr_t 	*pc_pdir_shadow;				\
+	int	pc_currentldt;						\
+	u_int   pc_acpi_id;		/* ACPI CPU id */		\
+	u_int	pc_apic_id;						\
+	int	pc_private_tss;		/* Flag indicating private tss*/\
+        u_int     pc_cr3;		/* track cr3 for R1/R3*/	\
+        u_int     pc_pdir;                                              \
+        u_int     pc_lazypmap;                                          \
+        u_int     pc_rendezvous;                                        \
+        u_int     pc_cpuast;						\
+	uint64_t  pc_processed_system_time;				\
+	struct shadow_time_info pc_shadow_time;				\
+	u_int	pc_resched_irq;						\
+	u_int	pc_callfunc_irq;					\
+        u_int	pc_virq_to_irq[NR_VIRQS];				\
+	u_int	pc_ipi_to_irq[NR_IPIS]
+	
+#else
+
+#define	PCPU_MD_FIELDS							\
 	struct	pcpu *pc_prvspace;		/* Self-reference */	\
 	struct	pmap *pc_curpmap;					\
 	struct	i386tss pc_common_tss;					\
@@ -56,6 +102,8 @@
 	u_int	pc_acpi_id;						\
 	u_int	pc_apic_id
 
+#endif
+
 #if defined(lint)
  
 extern struct pcpu *pcpup;
Index: i386/include/segments.h
===================================================================
--- i386/include/segments.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/segments.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -47,7 +47,11 @@
  */
 
 #define	ISPL(s)	((s)&3)		/* what is the priority level of a selector */
+#ifndef XEN
 #define	SEL_KPL	0		/* kernel priority level */
+#else
+#define	SEL_KPL	1		/* kernel priority level */
+#endif
 #define	SEL_UPL	3		/* user priority level */
 #define	ISLDT(s)	((s)&SEL_LDT)	/* is it local or global */
 #define	SEL_LDT	4		/* local descriptor table */
@@ -222,8 +226,11 @@
 #define	GBIOSARGS_SEL	17	/* BIOS interface (Arguments) */
 #define	GNDIS_SEL	18	/* For the NDIS layer */
 
+#ifndef XEN
 #define	NGDT 		19
-
+#else
+#define	NGDT 		9
+#endif
 /*
  * Entries in the Local Descriptor Table (LDT)
  */
@@ -240,10 +247,16 @@
 
 #ifdef _KERNEL
 extern int	_default_ldt;
+#ifndef XEN
+extern union descriptor ldt[NLDT];
 extern union descriptor gdt[];
+#else
+extern union descriptor *ldt;
+extern union descriptor *gdt;
+#endif
+
 extern struct soft_segment_descriptor gdt_segs[];
 extern struct gate_descriptor *idt;
-extern union descriptor ldt[NLDT];
 extern struct region_descriptor r_gdt, r_idt;
 
 void	lgdt(struct region_descriptor *rdp);
Index: i386/include/pmap.h
===================================================================
--- i386/include/pmap.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/pmap.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -68,7 +68,14 @@
 /* Our various interpretations of the above */
 #define PG_W		PG_AVAIL1	/* "Wired" pseudoflag */
 #define	PG_MANAGED	PG_AVAIL2
+
+#ifdef PAE
+#define PG_FRAME        (0x000ffffffffff000ull) 
+#define PG_PS_FRAME     (0x000fffffffe00000ull) 
+#else
 #define	PG_FRAME	(~((vm_paddr_t)PAGE_MASK))
+#define PG_PS_FRAME     (0xffc00000)
+#endif
 #define	PG_PROT		(PG_RW|PG_U)	/* all protection bits . */
 #define PG_N		(PG_NC_PWT|PG_NC_PCD)	/* Non-cacheable */
 
@@ -175,7 +182,78 @@
  * the corresponding pde that in turn maps it.
  */
 #define	vtopte(va)	(PTmap + i386_btop(va))
+#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
 
+#ifdef XEN
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenpmap.h>
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+#define PG_KERNEL  (PG_V | PG_A | PG_RW | PG_M)
+
+#define MACH_TO_VM_PAGE(ma) PHYS_TO_VM_PAGE(xpmap_mtop((ma)))
+#define VM_PAGE_TO_MACH(m) xpmap_ptom(VM_PAGE_TO_PHYS((m)))
+
+static __inline vm_paddr_t
+pmap_kextract_ma(vm_offset_t va)
+{
+        vm_paddr_t ma;
+        if ((ma = PTD[va >> PDRSHIFT]) & PG_PS) {
+                ma = (ma & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+        } else {
+                ma = (*vtopte(va) & PG_FRAME) | (va & PAGE_MASK);
+        }
+        return ma;
+}
+
+static __inline vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+        return xpmap_mtop(pmap_kextract_ma(va));
+}
+#define vtomach(va)     pmap_kextract_ma(((vm_offset_t) (va)))
+
+vm_paddr_t pmap_extract_ma(struct pmap *pmap, vm_offset_t va);
+
+void    pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa);
+void    pmap_map_readonly(struct pmap *pmap, vm_offset_t va, int len);
+void    pmap_map_readwrite(struct pmap *pmap, vm_offset_t va, int len);
+
+static __inline pt_entry_t
+pte_load_store(pt_entry_t *ptep, pt_entry_t v)
+{
+	pt_entry_t r;
+
+	v = xpmap_ptom(v);
+	r = *ptep;
+	PT_SET_VA(ptep, v, TRUE);
+	return (r);
+}
+
+static __inline pt_entry_t
+pte_load_store_ma(pt_entry_t *ptep, pt_entry_t v)
+{
+	pt_entry_t r;
+
+	r = *ptep;
+	PT_SET_VA_MA(ptep, v, TRUE);
+	return (r);
+}
+
+#define	pte_load_clear(ptep)	pte_load_store((ptep), (pt_entry_t)0ULL)
+
+#define	pte_store(ptep, pte)	pte_load_store((ptep), (pt_entry_t)pte)
+#define	pte_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
+#define	pde_store_ma(ptep, pte)	pte_load_store_ma((ptep), (pt_entry_t)pte)
+
+#elif !defined(XEN)
+
 /*
  *	Routine:	pmap_kextract
  *	Function:
@@ -195,11 +273,10 @@
 	}
 	return pa;
 }
+#endif
 
-#define	vtophys(va)	pmap_kextract(((vm_offset_t) (va)))
+#if defined(PAE) && !defined(XEN)
 
-#ifdef PAE
-
 static __inline pt_entry_t
 pte_load(pt_entry_t *ptep)
 {
@@ -231,7 +308,7 @@
 
 #define	pte_store(ptep, pte)	pte_load_store((ptep), (pt_entry_t)pte)
 
-#else /* PAE */
+#elif !defined (PAE) && !defined(XEN)
 
 static __inline pt_entry_t
 pte_load(pt_entry_t *ptep)
Index: i386/include/vmparam.h
===================================================================
--- i386/include/vmparam.h	(.../stable/6/sys)	(revision 184012)
+++ i386/include/vmparam.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -83,8 +83,12 @@
  * Kernel physical load address.
  */
 #ifndef KERNLOAD
+#if defined(XEN) && !defined(XEN_PRIVILEGED_GUEST)
+#define	KERNLOAD		0
+#else
 #define	KERNLOAD		(1 << PDRSHIFT)
 #endif
+#endif
 
 /*
  * Virtual addresses of things.  Derived from the page directory and
@@ -93,7 +97,11 @@
  * messy at times, but hey, we'll do anything to save a page :-)
  */
 
+#ifdef XEN
+#define VM_MAX_KERNEL_ADDRESS	HYPERVISOR_VIRT_START
+#else
 #define VM_MAX_KERNEL_ADDRESS	VADDR(KPTDI+NKPDE-1, NPTEPG-1)
+#endif
 #define VM_MIN_KERNEL_ADDRESS	VADDR(PTDPTDI, PTDPTDI)
 
 #define	KERNBASE		VADDR(KPTDI, 0)
Index: i386/include/xen/xen-os.h
===================================================================
--- i386/include/xen/xen-os.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/xen-os.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,291 @@
+/******************************************************************************
+ * os.h
+ * 
+ * random collection of macros and definition
+ */
+
+#ifndef _XEN_OS_H_
+#define _XEN_OS_H_
+
+#ifdef PAE
+#define CONFIG_X86_PAE
+#endif
+
+#if defined(XEN) && !defined(__XEN_INTERFACE_VERSION__)  
+/*  
+ * Can update to a more recent version when we implement  
+ * the hypercall page  
+ */  
+#define  __XEN_INTERFACE_VERSION__ 0x00030204  
+#endif  
+
+#include <xen/interface/xen.h>
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+extern int gdtset;
+
+extern shared_info_t *HYPERVISOR_shared_info;
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__ ( "rep;nop" : : : "memory" );
+}
+#define cpu_relax() rep_nop()
+
+/* crude memory allocator for memory allocation early in 
+ *  boot
+ */
+void *bootmem_alloc(unsigned int size);
+void bootmem_free(void *ptr, unsigned int size);
+
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+
+/* some function prototypes */
+void trap_init(void);
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+#define likely(x)  __builtin_expect((x),1)
+#define unlikely(x)  __builtin_expect((x),0)
+
+
+
+#define __cli()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+#define __sti()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 0;                                  \
+        barrier(); /* unmask then check (avoid races) */                \
+        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
+                force_evtchn_callback();                                \
+} while (0)
+
+#define __restore_flags(x)                                              \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
+                barrier(); /* unmask then check (avoid races) */        \
+                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
+                        force_evtchn_callback();                        \
+        } 								\
+} while (0)
+
+/*
+ * Add critical_{enter, exit}?
+ *
+ */
+#define __save_and_cli(x)                                               \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        (x) = _vcpu->evtchn_upcall_mask;                                \
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+
+#define local_irq_save(x)       __save_and_cli(x)
+#define local_irq_restore(x)    __restore_flags(x)
+#define local_irq_disable()     __cli()
+#define local_irq_enable()      __sti()
+
+#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
+#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
+#define spin_lock_irqsave mtx_lock_irqsave
+#define spin_unlock_irqrestore mtx_unlock_irqrestore
+
+
+#ifndef mb
+#define mb() __asm__ __volatile__("lock; addl $0, 0(%%esp)": : :"memory")
+#endif
+#ifndef rmb
+#define rmb() mb()
+#endif
+#ifndef wmb
+#define wmb() barrier()
+#endif
+#ifdef SMP
+#define smp_mb() mb() 
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends()      read_barrier_depends()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#define smp_read_barrier_depends()      do { } while(0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+
+#define xen_xchg(ptr,v) \
+        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+                                   int size)
+{
+    switch (size) {
+    case 1:
+        __asm__ __volatile__("xchgb %b0,%1"
+                             :"=q" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 2:
+        __asm__ __volatile__("xchgw %w0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 4:
+        __asm__ __volatile__("xchgl %0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    }
+    return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+        int oldbit;
+
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %2,%1\n\tsbbl %0,%0"
+                :"=r" (oldbit),"=m" (ADDR)
+                :"Ir" (nr) : "memory");
+        return oldbit;
+}
+
+static __inline int constant_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline int variable_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    
+    __asm__ __volatile__(
+        "btl %2,%1\n\tsbbl %0,%0"
+        :"=r" (oldbit)
+        :"m" (ADDR),"Ir" (nr));
+    return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btsl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+        __asm__ __volatile__(
+                LOCK "incl %0"
+                :"=m" (v->counter)
+                :"m" (v->counter));
+}
+
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */

Property changes on: i386/include/xen/xen-os.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/xen/hypercall.h
===================================================================
--- i386/include/xen/hypercall.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/hypercall.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,402 @@
+/******************************************************************************
+ * hypercall.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __MACHINE_XEN_HYPERCALL_H__
+#define __MACHINE_XEN_HYPERCALL_H__
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#define	ENOXENSYS	38
+#define CONFIG_XEN_COMPAT	0x030002
+
+
+#if defined(XEN)
+#define HYPERCALL_STR(name)                                     \
+        "call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
+#else
+#define HYPERCALL_STR(name)                                     \
+        "mov hypercall_stubs,%%eax; "                           \
+        "add $("STR(__HYPERVISOR_##name)" * 32),%%eax; "        \
+        "call *%%eax"
+#endif
+
+#define _hypercall0(type, name)                 \
+({                                              \
+        long __res;                             \
+        __asm__ volatile (                          \
+                HYPERCALL_STR(name)             \
+                : "=a" (__res)                  \
+                :                               \
+                : "memory" );                   \
+        (type)__res;                            \
+})
+
+#define _hypercall1(type, name, a1)                             \
+({                                                              \
+        long __res, __ign1;                                     \
+        __asm__ volatile (                                          \
+                HYPERCALL_STR(name)                             \
+                : "=a" (__res), "=b" (__ign1)                   \
+                : "1" ((long)(a1))                              \
+                : "memory" );                                   \
+        (type)__res;                                            \
+})
+
+#define _hypercall2(type, name, a1, a2)                         \
+({                                                              \
+        long __res, __ign1, __ign2;                             \
+        __asm__ volatile (                                          \
+                HYPERCALL_STR(name)                             \
+                : "=a" (__res), "=b" (__ign1), "=c" (__ign2)    \
+                : "1" ((long)(a1)), "2" ((long)(a2))            \
+                : "memory" );                                   \
+        (type)__res;                                            \
+})
+
+#define _hypercall3(type, name, a1, a2, a3)                     \
+({                                                              \
+        long __res, __ign1, __ign2, __ign3;                     \
+        __asm__ volatile (                                          \
+                HYPERCALL_STR(name)                             \
+                : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+                "=d" (__ign3)                                   \
+                : "1" ((long)(a1)), "2" ((long)(a2)),           \
+                "3" ((long)(a3))                                \
+                : "memory" );                                   \
+        (type)__res;                                            \
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)                 \
+({                                                              \
+        long __res, __ign1, __ign2, __ign3, __ign4;             \
+        __asm__ volatile (                                          \
+                HYPERCALL_STR(name)                             \
+                : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+                "=d" (__ign3), "=S" (__ign4)                    \
+                : "1" ((long)(a1)), "2" ((long)(a2)),           \
+                "3" ((long)(a3)), "4" ((long)(a4))              \
+                : "memory" );                                   \
+        (type)__res;                                            \
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)             \
+({                                                              \
+        long __res, __ign1, __ign2, __ign3, __ign4, __ign5;     \
+        __asm__ volatile (                                          \
+                HYPERCALL_STR(name)                             \
+                : "=a" (__res), "=b" (__ign1), "=c" (__ign2),   \
+                "=d" (__ign3), "=S" (__ign4), "=D" (__ign5)     \
+                : "1" ((long)(a1)), "2" ((long)(a2)),           \
+                "3" ((long)(a3)), "4" ((long)(a4)),             \
+                "5" ((long)(a5))                                \
+                : "memory" );                                   \
+        (type)__res;                                            \
+})								
+
+static inline int
+HYPERVISOR_set_trap_table(
+	trap_info_t *table)
+{
+	return _hypercall1(int, set_trap_table, table);
+}
+
+static inline int
+HYPERVISOR_mmu_update(
+	mmu_update_t *req, int count, int *success_count, domid_t domid)
+{
+	return _hypercall4(int, mmu_update, req, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_mmuext_op(
+	mmuext_op_t *op, int count, int *success_count, domid_t domid)
+{
+	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+}
+
+static inline int
+HYPERVISOR_set_gdt(
+	unsigned long *frame_list, int entries)
+{
+	return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
+static inline int
+HYPERVISOR_stack_switch(
+	unsigned long ss, unsigned long esp)
+{
+	return _hypercall2(int, stack_switch, ss, esp);
+}
+
+static inline int
+HYPERVISOR_set_callbacks(
+	unsigned long event_selector, unsigned long event_address,
+	unsigned long failsafe_selector, unsigned long failsafe_address)
+{
+	return _hypercall4(int, set_callbacks,
+			   event_selector, event_address,
+			   failsafe_selector, failsafe_address);
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+	int set)
+{
+	return _hypercall1(int, fpu_taskswitch, set);
+}
+
+static inline int 
+HYPERVISOR_sched_op_compat(
+	int cmd, unsigned long arg)
+{
+	return _hypercall2(int, sched_op_compat, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_sched_op(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long
+HYPERVISOR_set_timer_op(
+	uint64_t timeout)
+{
+	unsigned long timeout_hi = (unsigned long)(timeout>>32);
+	unsigned long timeout_lo = (unsigned long)timeout;
+	return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi);
+}
+#if 0
+static inline int
+HYPERVISOR_platform_op(
+        struct xen_platform_op *platform_op)
+{
+        platform_op->interface_version = XENPF_INTERFACE_VERSION;
+        return _hypercall1(int, platform_op, platform_op);
+}
+#endif
+static inline int
+HYPERVISOR_set_debugreg(
+	int reg, unsigned long value)
+{
+	return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static inline unsigned long
+HYPERVISOR_get_debugreg(
+	int reg)
+{
+	return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
+static inline int
+HYPERVISOR_update_descriptor(
+	uint64_t ma, uint64_t desc)
+{
+	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
+}
+
+static inline int
+HYPERVISOR_memory_op(
+	unsigned int cmd, void *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_multicall(
+	void *call_list, int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping(
+	unsigned long va, uint64_t new_val, unsigned long flags)
+{
+	uint32_t hi, lo;
+
+	lo = (uint32_t)(new_val & 0xffffffff);
+	hi = (uint32_t)(new_val >> 32);
+	
+	return _hypercall4(int, update_va_mapping, va,
+			   lo, hi, flags);
+}
+
+static inline int
+HYPERVISOR_event_channel_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, event_channel_op, cmd, arg);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (__predict_false(rc == -ENOXENSYS)) {
+		struct evtchn_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, event_channel_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+	return (rc);
+}
+
+static inline int
+HYPERVISOR_xen_version(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int
+HYPERVISOR_console_io(
+	int cmd, int count, char *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int
+HYPERVISOR_physdev_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, physdev_op, cmd, arg);
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (__predict_false(rc == -ENOXENSYS)) {
+		struct physdev_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, physdev_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+	return (rc);
+}
+
+static inline int
+HYPERVISOR_grant_table_op(
+	unsigned int cmd, void *uop, unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+static inline int
+HYPERVISOR_update_va_mapping_otherdomain(
+	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
+{
+	uint32_t hi, lo;
+	
+	lo = (uint32_t)(new_val & 0xffffffff);
+	hi = (uint32_t)(new_val >> 32);
+	
+	return _hypercall5(int, update_va_mapping_otherdomain, va,
+			   lo, hi, flags, domid);
+}
+
+static inline int
+HYPERVISOR_vm_assist(
+	unsigned int cmd, unsigned int type)
+{
+	return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int
+HYPERVISOR_vcpu_op(
+	int cmd, int vcpuid, void *extra_args)
+{
+	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+}
+
+static inline int
+HYPERVISOR_suspend(
+	unsigned long srec)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = SHUTDOWN_suspend
+	};
+	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
+			   &sched_shutdown, srec);
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
+				 SHUTDOWN_suspend, srec);
+#endif	
+	return (rc);
+}
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+static inline int
+HYPERVISOR_nmi_op(
+        unsigned long op, void *arg)
+{
+        return _hypercall2(int, nmi_op, op, arg);
+}
+#endif
+
+static inline int
+HYPERVISOR_callback_op(
+        int cmd, void *arg)
+{
+        return _hypercall2(int, callback_op, cmd, arg);
+}
+
+#ifndef CONFIG_XEN
+static inline unsigned long
+HYPERVISOR_hvm_op(
+    int op, void *arg)
+{
+    return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+#endif
+
+static inline int
+HYPERVISOR_xenoprof_op(
+        int op, void *arg)
+{
+        return _hypercall2(int, xenoprof_op, op, arg);
+}
+
+static inline int
+HYPERVISOR_kexec_op(
+        unsigned long op, void *args)
+{
+        return _hypercall2(int, kexec_op, op, args);
+}
+
+#endif /* __MACHINE_XEN_HYPERCALL_H__ */
+
+/*
+ * Local variables:
+ *  c-file-style: "linux"
+ *  indent-tabs-mode: t
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ *  tab-width: 8
+ * End:
+ */

Property changes on: i386/include/xen/hypercall.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/xen/xenvar.h
===================================================================
--- i386/include/xen/xenvar.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/xenvar.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2008 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * $FreeBSD$
+ */
+#ifndef XENVAR_H_
+#define XENVAR_H_
+#define XBOOTUP 0x1
+#define XPMAP   0x2
+extern int xendebug_flags;
+#ifndef NOXENDEBUG
+#define XENPRINTF printk
+#else
+#define XENPRINTF printf
+#endif
+
+extern	xen_pfn_t *xen_phys_machine;
+extern	xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
+extern	xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
+
+#if 0
+#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_DEBUG(argflags, _f, _a...) \
+if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
+#else
+#define TRACE_ENTER
+#define TRACE_EXIT
+#define TRACE_DEBUG(argflags, _f, _a...)
+#endif
+
+extern xen_pfn_t *xen_machine_phys;
+/* Xen starts physical pages after the 4MB ISA hole -
+ * FreeBSD doesn't
+ */
+
+
+#undef ADD_ISA_HOLE /* XXX */
+
+#ifdef ADD_ISA_HOLE
+#define ISA_INDEX_OFFSET 1024 
+#define ISA_PDR_OFFSET 1
+#else
+#define ISA_INDEX_OFFSET 0
+#define ISA_PDR_OFFSET 0
+#endif
+
+
+#define PFNTOMFN(i) (xen_phys_machine[(i)])
+#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
+
+#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
+#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
+
+#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
+#define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
+
+#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
+#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
+#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
+
+
+void xpq_init(void);
+
+int  xen_create_contiguous_region(vm_page_t pages, int npages);
+
+void  xen_destroy_contiguous_region(void * addr, int npages);
+
+#endif

Property changes on: i386/include/xen/xenvar.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/xen/synch_bitops.h
===================================================================
--- i386/include/xen/synch_bitops.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/synch_bitops.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,139 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ ( 
+        "lock btsl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btrl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btcl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+
+    __asm__ __volatile__ (
+        "lock btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+struct __synch_xchg_dummy { unsigned long a[100]; };
+#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x))
+
+#define synch_cmpxchg(ptr, old, new) \
+((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
+                                     (unsigned long)(old), \
+                                     (unsigned long)(new), \
+                                     sizeof(*(ptr))))
+
+static inline unsigned long __synch_cmpxchg(volatile void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__("lock; cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__("lock; cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+#ifdef CONFIG_X86_64
+	case 4:
+		__asm__ __volatile__("lock; cmpxchgl %k1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 8:
+		__asm__ __volatile__("lock; cmpxchgq %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+#else
+	case 4:
+		__asm__ __volatile__("lock; cmpxchgl %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+#endif
+	}
+	return old;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & 
+            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+    return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */

Property changes on: i386/include/xen/synch_bitops.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/xen/xenfunc.h
===================================================================
--- i386/include/xen/xenfunc.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/xenfunc.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,79 @@
+/*
+ *
+ * Copyright (c) 2004,2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENFUNC_H_
+#define _XEN_XENFUNC_H_
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/xenpmap.h>
+#include <machine/segments.h>
+#include <sys/pcpu.h>
+#define BKPT __asm__("int3");
+#define XPQ_CALL_DEPTH 5
+#define XPQ_CALL_COUNT 2
+#define PG_PRIV PG_AVAIL3
+typedef struct { 
+	unsigned long pt_ref;
+	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
+} pteinfo_t;
+
+extern pteinfo_t *pteinfo_list;
+#ifdef XENDEBUG_LOW
+#define	__PRINTK(x) printk x
+#else
+#define	__PRINTK(x)
+#endif
+
+char *xen_setbootenv(char *cmd_line);
+
+int  xen_boothowto(char *envp);
+
+void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
+
+#ifdef INVARIANTS
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
+#endif	
+
+void xen_update_descriptor(union descriptor *, union descriptor *);
+
+extern struct mtx balloon_lock;
+#if 0
+#define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
+#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
+#else
+#define balloon_lock(__flags)   __flags = 1
+#define balloon_unlock(__flags) __flags = 0
+#endif
+
+
+
+#endif /* _XEN_XENFUNC_H_ */

Property changes on: i386/include/xen/xenfunc.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/include/xen/xenpmap.h
===================================================================
--- i386/include/xen/xenpmap.h	(.../stable/6/sys)	(revision 0)
+++ i386/include/xen/xenpmap.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,230 @@
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENPMAP_H_
+#define _XEN_XENPMAP_H_
+
+#include <xen/features.h>
+
+void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
+void xen_pt_switch(vm_paddr_t);
+void xen_set_ldt(vm_paddr_t, unsigned long);
+void xen_pgdpt_pin(vm_paddr_t);
+void xen_pgd_pin(vm_paddr_t);
+void xen_pgd_unpin(vm_paddr_t);
+void xen_pt_pin(vm_paddr_t);
+void xen_pt_unpin(vm_paddr_t);
+void xen_flush_queue(void);
+void xen_check_queue(void);
+#if 0
+void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
+#endif
+void pmap_suspend(void);
+void pmap_resume(void);
+
+#ifdef INVARIANTS
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
+#endif	
+
+#ifdef PMAP_DEBUG
+#define PMAP_REF pmap_ref
+#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
+#define PMAP_MARK_PRIV pmap_mark_privileged
+#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
+#else 
+#define PMAP_MARK_PRIV(a)
+#define PMAP_MARK_UNPRIV(a)
+#define PMAP_REF(a, b)
+#define PMAP_DEC_REF_PAGE(a)
+#endif
+
+#define ALWAYS_SYNC 0
+
+#ifdef PT_DEBUG
+#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) 
+#else
+#define PT_LOG()
+#endif
+
+#define INVALID_P2M_ENTRY	(~0UL)
+
+#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
+
+#define SH_PD_SET_VA        1
+#define SH_PD_SET_VA_MA     2
+#define SH_PD_SET_VA_CLEAR  3
+
+struct pmap;
+void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
+#ifdef notyet
+static vm_paddr_t
+vptetomachpte(vm_paddr_t *pte)
+{
+	vm_offset_t offset, ppte;
+	vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
+	int pgindex;
+
+	ppte = (vm_offset_t)pte;
+	pgoffset = (ppte & PAGE_MASK);
+	offset = ppte - (vm_offset_t)PTmap;
+	pgindex = ppte >> PDRSHIFT;
+
+	pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
+	retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
+	return (retval);
+}
+#endif
+#define	PT_GET(_ptp)						\
+	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
+
+#ifdef WRITABLE_PAGETABLES
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        PT_LOG();                                               \
+        *(_ptp) = xpmap_ptom((_npte));                          \
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+        PT_LOG();                                               \
+        *(_ptp) = (_npte);                                      \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        PT_LOG();                                               \
+        *(_ptp) = 0;                                            \
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA);           \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptp, sync) do {			\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR);  	\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#else /* !WRITABLE_PAGETABLES */
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+	xen_queue_pt_update(vtomach(_ptp), 	        \
+			    xpmap_ptom(_npte)); 		\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+	xen_queue_pt_update(vtomach(_ptp), _npte);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+	xen_queue_pt_update(vtomach(_ptp), 0);            \
+	if (sync || ALWAYS_SYNC)				\
+		xen_flush_queue();				\
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA);     \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA);  \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do {		\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR);    \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#endif
+
+#define PT_SET_MA(_va, _ma)						\
+do {									\
+	int err;							\
+	err = HYPERVISOR_update_va_mapping(((unsigned long)(_va)),	\
+	    (_ma), UVMF_INVLPG| UVMF_ALL);				\
+	KASSERT(err >= 0, ("unexpected result from update_va_mapping")); \
+} while (/*CONSTCOND*/0)	  
+
+#define	PT_UPDATES_FLUSH() do {				        \
+        xen_flush_queue();                                      \
+} while (/*CONSTCOND*/0)
+
+static __inline vm_paddr_t
+xpmap_mtop(vm_paddr_t mpa)
+{
+	vm_paddr_t tmp = (mpa & PG_FRAME);
+	
+	return machtophys(tmp) | (mpa & ~PG_FRAME);
+}
+
+static __inline vm_paddr_t
+xpmap_ptom(vm_paddr_t ppa)
+{
+	vm_paddr_t tmp = (ppa & PG_FRAME);
+
+	return phystomach(tmp) | (ppa & ~PG_FRAME);
+}
+
+static __inline void
+set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+#ifdef notyet	
+        PANIC_IF(max_mapnr && pfn >= max_mapnr);
+#endif	
+        if (xen_feature(XENFEAT_auto_translated_physmap)) {
+#ifdef notyet		
+                PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
+#endif		
+                return;
+        }
+        xen_phys_machine[pfn] = mfn;
+}
+
+
+
+
+#endif /* _XEN_XENPMAP_H_ */

Property changes on: i386/include/xen/xenpmap.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/conf/DEFAULTS
===================================================================
--- i386/conf/DEFAULTS	(.../stable/6/sys)	(revision 184012)
+++ i386/conf/DEFAULTS	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -15,3 +15,5 @@
 # Pseudo devices.
 device		mem		# Memory and kernel memory devices
 device		io		# I/O device
+
+options 	NATIVE
Index: i386/conf/XEN
===================================================================
--- i386/conf/XEN	(.../stable/6/sys)	(revision 0)
+++ i386/conf/XEN	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,156 @@
+#
+# GENERIC -- Generic kernel configuration file for FreeBSD/i386
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+#    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD$
+
+machine		i386
+cpu		I686_CPU
+ident		XEN
+
+# To statically compile in device wiring instead of /boot/device.hints
+#hints		"GENERIC.hints"		# Default places to look for devices.
+
+makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
+makeoptions	MODULES_OVERRIDE=""
+
+#options 	SCHED_ULE		# ULE scheduler
+#options 	PREEMPTION		# Enable kernel thread preemption
+options		SCHED_4BSD
+options 	INET			# InterNETworking
+options 	INET6			# IPv6 communications protocols
+options 	FFS			# Berkeley Fast Filesystem
+options 	SOFTUPDATES		# Enable FFS soft updates support
+options 	UFS_ACL			# Support for access control lists
+options 	UFS_DIRHASH		# Improve performance on big directories
+options 	MD_ROOT			# MD is a potential root device
+options 	NFSCLIENT		# Network Filesystem Client
+options 	NFSSERVER		# Network Filesystem Server
+options 	NFS_ROOT		# NFS usable as /, requires NFSCLIENT
+options 	MSDOSFS			# MSDOS Filesystem
+options 	CD9660			# ISO 9660 Filesystem
+options 	PROCFS			# Process filesystem (requires PSEUDOFS)
+options 	PSEUDOFS		# Pseudo-filesystem framework
+options 	GEOM_LABEL		# Provides labelization
+options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
+options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
+options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
+options 	KTRACE			# ktrace(1) support
+options 	SYSVSHM			# SYSV-style shared memory
+options 	SYSVMSG			# SYSV-style message queues
+options 	SYSVSEM			# SYSV-style semaphores
+options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
+options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
+options 	AUDIT			# Security event auditing
+
+# Debugging for use in -current
+options 	KDB			# Enable kernel debugger support.
+options 	DDB			# Support DDB.
+options 	GDB			# Support remote GDB.
+#options 	INVARIANTS		# Enable calls of extra sanity checking
+#options 	INVARIANT_SUPPORT	# Extra sanity checks of internal structures, required by INVARIANTS
+#options 	WITNESS			# Enable checks to detect deadlocks and cycles
+#options 	WITNESS_SKIPSPIN	# Don't run witness on spinlocks for speed
+
+# To make an SMP kernel, the next two lines are needed
+#options 	SMP			# Symmetric MultiProcessor Kernel
+#device		apic			# I/O APIC
+options		PAE
+
+
+# CPU frequency control
+#device		cpufreq 		# native only
+
+# Bus support.
+#device		pci
+
+# SCSI peripherals
+device		scbus		# SCSI bus (required for SCSI)
+device		ch		# SCSI media changers
+device		da		# Direct Access (disks)
+device		sa		# Sequential Access (tape etc)
+device		cd		# CD
+device		pass		# Passthrough device (direct SCSI access)
+device		ses		# SCSI Environmental Services (and SAF-TE)
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+device		atkbdc		# AT keyboard controller
+device		atkbd		# AT keyboard
+device		psm		# PS/2 mouse
+device		kbdmux		# keyboard multiplexer
+#device		vga		# VGA video card driver
+device		splash		# Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+
+#device		agp		# support several AGP chipsets
+
+# Power management support (see NOTES for more options)
+#device		apm
+# Add suspend/resume support for the i8254.
+#device		pmtimer		# native
+
+device		pci
+
+# Serial (COM) ports
+device		uart		# Generic UART driver
+
+# If you've got a "dumb" serial or parallel PCI card that is
+# supported by the puc(4) glue driver, uncomment the following
+# line to enable it (connects to sio, uart and/or ppc drivers):
+#device		puc
+
+# PCI Ethernet NICs.
+device		em		# Intel PRO/1000 adapter Gigabit Ethernet Card
+
+# PCI Ethernet NICs that use the common MII bus controller code.
+# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
+device		miibus		# MII bus support
+
+# Pseudo devices.
+device		loop		# Network loopback
+device		random		# Entropy device
+device		ether		# Ethernet support
+device		sl		# Kernel SLIP
+device		ppp		# Kernel PPP
+device		tun		# Packet tunnel.
+device		pty		# Pseudo-ttys (telnet etc)
+device		md		# Memory "disks"
+device		gif		# IPv6 and IPv4 tunneling
+device		faith		# IPv6-to-IPv4 relaying (translation)
+device		firmware	# firmware assist module
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+# Note that 'bpf' is required for DHCP.
+device		bpf		# Berkeley packet filter
+
+
+options		XEN
+nooption	NATIVE
+nodevice	atpic
+options		MCLSHIFT=12
+
+nodevice	isa
+nooption	ISAPNP
+
+options	KTR
+options KTR_COMPILE=(KTR_PMAP)
+options KTR_CPUMASK=0xff
+options KTR_ENTRIES=65536
+options KTR_MASK=(KTR_PMAP)
+options	KVA_PAGES=1600
Index: i386/i386/vm_machdep.c
===================================================================
--- i386/i386/vm_machdep.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/vm_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -89,6 +89,9 @@
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 
+#ifdef XEN
+#include <xen/hypervisor.h>
+#endif
 #ifdef PC98
 #include <pc98/cbus/cbus.h>
 #else
@@ -264,7 +267,7 @@
 
 	/* Setup to release sched_lock in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
-	td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+	td2->td_md.md_saved_flags = PSL_USER;
 
 	/*
 	 * Now, cpu_switch() can schedule the new process.
@@ -436,7 +439,7 @@
 
 	/* Setup to release sched_lock in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
-	td->td_md.md_saved_flags = PSL_KERNEL | PSL_I;
+	td->td_md.md_saved_flags = PSL_USER;
 }
 
 /*
@@ -593,6 +596,9 @@
 	int b;
 #endif
 
+#ifdef XEN
+	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+#endif	
 	disable_intr();
 #ifdef CPU_ELAN
 	if (elan_mmcr != NULL)
@@ -762,8 +768,11 @@
 	 */
 	ptep = vtopte(sf->kva);
 	opte = *ptep;
+#ifdef XEN
+	PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag | PG_RW | PG_V);
+#else	
 	*ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V;
-
+#endif
 	/*
 	 * Avoid unnecessary TLB invalidations: If the sf_buf's old
 	 * virtual-to-physical mapping was not used, then any processor
@@ -812,6 +821,14 @@
 	if (sf->ref_count == 0) {
 		TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
 		nsfbufsused--;
+#ifdef XEN
+		/*
+		 * Xen doesn't like having dangling R/W mappings
+		 */
+                pmap_qremove(sf->kva, 1);
+                sf->m = NULL;
+                LIST_REMOVE(sf, list_entry);
+#endif
 		if (sf_buf_alloc_want > 0)
 			wakeup_one(&sf_buf_freelist);
 	}
Index: i386/i386/swtch.s
===================================================================
--- i386/i386/swtch.s	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/swtch.s	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -71,7 +71,7 @@
 	movl	8(%esp),%ecx			/* New thread */
 	movl	TD_PCB(%ecx),%edx
 	movl	PCB_CR3(%edx),%eax
-	movl	%eax,%cr3			/* new address space */
+	LOAD_CR3(%eax)				/* new address space */
 	/* set bit in new pm_active */
 	movl	TD_PROC(%ecx),%eax
 	movl	P_VMSPACE(%eax), %ebx
@@ -114,11 +114,13 @@
 	movl	%gs,PCB_GS(%edx)
 	pushfl					/* PSL */
 	popl	PCB_PSL(%edx)
+#ifndef XEN
 	/* Check to see if we need to call a switchout function. */
 	movl	PCB_SWITCHOUT(%edx),%eax
 	cmpl	$0, %eax
 	je	1f
 	call	*%eax
+#endif
 1:
 	/* Test if debug registers should be saved. */
 	testl	$PCB_DBREGS,PCB_FLAGS(%edx)
@@ -171,7 +173,7 @@
 	movl	%cr3,%ebx			/* The same address space? */
 	cmpl	%ebx,%eax
 	je	sw1
-	movl	%eax,%cr3			/* new address space */
+	LOAD_CR3(%eax)				/* new address space */
 
 	/* Release bit from old pmap->pm_active */
 	movl	PCPU(CURPMAP), %ebx
@@ -191,7 +193,19 @@
 	btsl	%esi, PM_ACTIVE(%ebx)		/* set new */
 
 sw1:
+#ifdef XEN
+	pushl	%eax
+	pushl	%ecx
+	pushl	%edx
+	call	xen_handle_thread_switch
+	popl	%edx
+	popl	%ecx
+	popl	%eax
 	/*
+	 * XXX set IOPL
+	 */
+#else
+	/*
 	 * At this point, we've switched address spaces and are ready
 	 * to load up the rest of the next context.
 	 */
@@ -238,7 +252,7 @@
 	movl	12(%esi), %ebx
 	movl	%eax, 8(%edi)
 	movl	%ebx, 12(%edi)
-
+#endif
 	/* Restore context. */
 	movl	PCB_EBX(%edx),%ebx
 	movl	PCB_ESP(%edx),%esp
@@ -263,7 +277,7 @@
 	movl	_default_ldt,%eax
 	cmpl	PCPU(CURRENTLDT),%eax
 	je	2f
-	lldt	_default_ldt
+	LLDT(_default_ldt)
 	movl	%eax,PCPU(CURRENTLDT)
 	jmp	2f
 1:
@@ -366,7 +380,7 @@
 	 * parent's npx state for forks by forgetting to reload.
 	 */
 	pushfl
-	cli
+	CLI
 	movl	PCPU(FPCURTHREAD),%eax
 	testl	%eax,%eax
 	je	1f
Index: i386/i386/apic_vector.s
===================================================================
--- i386/i386/apic_vector.s	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/apic_vector.s	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -299,6 +299,7 @@
 /*
  * Handler for IPIs sent via the per-cpu IPI bitmap.
  */
+#ifndef XEN
 	.text
 	SUPERALIGN_TEXT
 IDTVEC(ipi_intr_bitmap_handler)	
@@ -320,7 +321,7 @@
 	addl	$4, %esp	/* XXX convert clockframe to trapframe */
 	MEXITCOUNT
 	jmp	doreti
-
+#endif
 /*
  * Executed by a CPU when it receives an Xcpustop IPI from another CPU,
  *
Index: i386/i386/genassym.c
===================================================================
--- i386/i386/genassym.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/genassym.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -227,3 +227,9 @@
 ASSYM(BUS_SPACE_HANDLE_BASE, offsetof(struct bus_space_handle, bsh_base));
 ASSYM(BUS_SPACE_HANDLE_IAT, offsetof(struct bus_space_handle, bsh_iat));
 #endif
+
+#ifdef XEN
+#include <xen/hypervisor.h>
+ASSYM(PC_CR3, offsetof(struct pcpu, pc_cr3));
+ASSYM(HYPERVISOR_VIRT_START, __HYPERVISOR_VIRT_START);
+#endif
Index: i386/i386/support.s
===================================================================
--- i386/i386/support.s	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/support.s	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -1426,10 +1426,11 @@
  */
 /* void lgdt(struct region_descriptor *rdp); */
 ENTRY(lgdt)
+#ifndef XEN
 	/* reload the descriptor table */
 	movl	4(%esp),%eax
 	lgdt	(%eax)
-
+#endif
 	/* flush the prefetch q */
 	jmp	1f
 	nop
Index: i386/i386/busdma_machdep.c
===================================================================
--- i386/i386/busdma_machdep.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/busdma_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -140,6 +140,11 @@
 static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
 static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr);
 
+#ifdef XEN
+#undef pmap_kextract
+#define pmap_kextract pmap_kextract_ma
+#endif
+
 /*
  * Return true if a match is made.
  *
Index: i386/i386/sys_machdep.c
===================================================================
--- i386/i386/sys_machdep.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/sys_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -58,6 +58,27 @@
 
 #include <vm/vm_kern.h>		/* for kernel_map */
 
+#ifdef XEN 
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/xenfunc.h>
+
+void i386_reset_ldt(struct proc_ldt *pldt); 
+
+void 
+i386_reset_ldt(struct proc_ldt *pldt) 
+{ 
+        xen_set_ldt((vm_offset_t)pldt->ldt_base, pldt->ldt_len); 
+} 
+#define SEG_VIRT_END  (HYPERVISOR_VIRT_START >> 12) & 0xffff
+#define SET_DESCRIPTOR(index, sd)  \
+	HYPERVISOR_update_descriptor(vtomach(&PCPU_GET(fsgs_gdt)[index]), *(uint64_t *)&(sd));
+#else  
+#define i386_reset_ldt(x)
+#define SEG_VIRT_END 0xffff
+#define SET_DESCRIPTOR(index, sd) PCPU_GET(fsgs_gdt)[index] = (sd);
+#endif
+
 #define MAX_LD 8192
 #define LD_PER_PAGE 512
 #define NEW_MAX_LD(num)  ((num + LD_PER_PAGE) & ~(LD_PER_PAGE-1))
@@ -163,7 +184,7 @@
 			 */
 			sd.sd_lobase = base & 0xffffff;
 			sd.sd_hibase = (base >> 24) & 0xff;
-			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
+			sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */
 			sd.sd_hilimit = 0xf;
 			sd.sd_type  = SDT_MEMRWA;
 			sd.sd_dpl   = SEL_UPL;
@@ -173,7 +194,7 @@
 			sd.sd_gran  = 1;
 			critical_enter();
 			td->td_pcb->pcb_fsd = sd;
-			PCPU_GET(fsgs_gdt)[0] = sd;
+			SET_DESCRIPTOR(0, sd);
 			critical_exit();
 			td->td_frame->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
 		}
@@ -193,7 +214,7 @@
 			 */
 			sd.sd_lobase = base & 0xffffff;
 			sd.sd_hibase = (base >> 24) & 0xff;
-			sd.sd_lolimit = 0xffff;	/* 4GB limit, wraps around */
+			sd.sd_lolimit = SEG_VIRT_END; /* 4GB limit, wraps */
 			sd.sd_hilimit = 0xf;
 			sd.sd_type  = SDT_MEMRWA;
 			sd.sd_dpl   = SEL_UPL;
@@ -203,7 +224,7 @@
 			sd.sd_gran  = 1;
 			critical_enter();
 			td->td_pcb->pcb_gsd = sd;
-			PCPU_GET(fsgs_gdt)[1] = sd;
+			SET_DESCRIPTOR(1, sd);
 			critical_exit();
 			load_gs(GSEL(GUGS_SEL, SEL_UPL));
 		}
@@ -364,6 +385,10 @@
 	struct proc_ldt *pldt;
 
 	pldt = mdp->md_ldt;
+#ifdef XEN
+	i386_reset_ldt(pldt);
+	PCPU_SET(currentldt, (int)pldt);
+#else	
 #ifdef SMP
 	gdt[PCPU_GET(cpuid) * NGDT + GUSERLDT_SEL].sd = pldt->ldt_sd;
 #else
@@ -371,6 +396,7 @@
 #endif
 	lldt(GSEL(GUSERLDT_SEL, SEL_KPL));
 	PCPU_SET(currentldt, GSEL(GUSERLDT_SEL, SEL_KPL));
+#endif /* !XEN */
 }
 
 #ifdef SMP
@@ -385,6 +411,39 @@
 }
 #endif
 
+#ifdef XEN
+
+struct proc_ldt * 
+user_ldt_alloc(struct mdproc *mdp, int len) 
+{ 
+        struct proc_ldt *pldt, *new_ldt; 
+ 
+        MALLOC(new_ldt, struct proc_ldt *, sizeof(struct proc_ldt), 
+                M_SUBPROC, M_WAITOK); 
+ 
+        new_ldt->ldt_len = len = NEW_MAX_LD(len); 
+        new_ldt->ldt_base = (caddr_t)kmem_alloc(kernel_map, 
+                round_page(len * sizeof(union descriptor))); 
+        if (new_ldt->ldt_base == NULL) { 
+                FREE(new_ldt, M_SUBPROC); 
+                return NULL; 
+        } 
+        new_ldt->ldt_refcnt = 1; 
+        new_ldt->ldt_active = 0; 
+ 
+        if ((pldt = mdp->md_ldt)) { 
+                if (len > pldt->ldt_len) 
+                        len = pldt->ldt_len; 
+                bcopy(pldt->ldt_base, new_ldt->ldt_base, 
+                    len * sizeof(union descriptor)); 
+        } else { 
+                bcopy(ldt, new_ldt->ldt_base, PAGE_SIZE); 
+        } 
+        pmap_map_readonly(kernel_pmap, (vm_offset_t)new_ldt->ldt_base, 
+                          new_ldt->ldt_len*sizeof(union descriptor)); 
+        return new_ldt; 
+} 
+#else
 /*
  * Must be called with either sched_lock free or held but not recursed.
  * If it does not return NULL, it will return with it owned.
@@ -425,6 +484,7 @@
 	}
 	return new_ldt;
 }
+#endif
 
 /*
  * Must be called either with sched_lock free or held but not recursed.
@@ -443,8 +503,11 @@
 		mtx_lock_spin(&sched_lock);
 	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
 	if (td == PCPU_GET(curthread)) {
+#ifndef XEN
 		lldt(_default_ldt);
+#endif		
 		PCPU_SET(currentldt, _default_ldt);
+		i386_reset_ldt((struct proc_ldt *)_default_ldt);
 	}
 
 	mdp->md_ldt = NULL;
@@ -549,6 +612,9 @@
 	}
 
 	if (!(uap->start == LDT_AUTO_ALLOC && uap->num == 1)) {
+#ifdef XEN 
+                        load_gs(0);  /* XXX check if we really still need this */
+#endif 
 		/* complain a for a while if using old methods */
 		if (ldt_warnings++ < NUM_LDT_WARNINGS) {
 			printf("Warning: pid %d used static ldt allocation.\n",
@@ -671,6 +737,23 @@
 	return (error);
 }
 
+#ifdef XEN
+static int 
+i386_set_ldt_data(struct thread *td, int start, int num, 
+        union descriptor *descs) 
+{ 
+        struct mdproc *mdp = &td->td_proc->p_md; 
+        struct proc_ldt *pldt = mdp->md_ldt; 
+        int i, error; 
+
+        for (i = 0; i < num; i++) { 
+                error = HYPERVISOR_update_descriptor(vtomach(&((union descriptor *)(pldt->ldt_base))[start + i]), *(uint64_t *)(descs + i)); 
+                if (error) 
+                        panic("failed to update ldt: %d", error); 
+        } 
+        return (0); 
+} 
+#else
 static int
 i386_set_ldt_data(struct thread *td, int start, int num,
 	union descriptor *descs)
@@ -686,6 +769,7 @@
 	    num * sizeof(union descriptor));
 	return (0);
 }
+#endif
 
 static int
 i386_ldt_grow(struct thread *td, int len) 
Index: i386/i386/machdep.c
===================================================================
--- i386/i386/machdep.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -141,6 +141,24 @@
 uint32_t arch_i386_xbox_memsize = 0;
 #endif
 
+#ifdef XEN
+/* XEN includes */
+#include <xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/xen_intr.h>
+
+void Xhypervisor_callback(void);
+void failsafe_callback(void);
+
+extern trap_info_t trap_table[];
+struct proc_ldt default_proc_ldt;
+extern int init_first;
+int running_xen = 1;
+extern unsigned long physfree;
+#endif /* XEN */
+
 /* Sanity check for __curthread() */
 CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
 
@@ -282,8 +300,9 @@
 	 */
 	bufinit();
 	vm_pager_bufferinit();
-
+#ifndef XEN
 	cpu_setregs();
+#endif
 }
 
 /*
@@ -1108,6 +1127,25 @@
 	return (0);
 }
 
+static int	cpu_idle_hlt = 1;
+SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
+    &cpu_idle_hlt, 0, "Idle loop HLT enable");
+#ifdef XEN
+
+void
+cpu_halt(void)
+{
+	HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+}
+
+static void
+cpu_idle_default(void)
+{
+	idle_block();
+}
+
+#else
+
 /*
  * Shutdown the CPU as much as possible
  */
@@ -1133,9 +1171,6 @@
  * XXX I'm turning it on for SMP as well by default for now.  It seems to
  * help lock contention somewhat, and this is critical for HTT. -Peter
  */
-static int	cpu_idle_hlt = 1;
-SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW,
-    &cpu_idle_hlt, 0, "Idle loop HLT enable");
 
 static void
 cpu_idle_default(void)
@@ -1147,6 +1182,7 @@
 	 */
 	__asm __volatile("sti; hlt");
 }
+#endif /* !XEN */
 
 /*
  * Note that we have to be careful here to avoid a race between checking
@@ -1158,7 +1194,7 @@
 cpu_idle(void)
 {
 
-#ifdef SMP
+#if defined(SMP) && !defined(XEN)
 	if (mp_grab_cpu_hlt())
 		return;
 #endif
@@ -1317,10 +1353,16 @@
  */
 
 int _default_ldt;
+
+#ifdef XEN
+union descriptor *gdt;
+union descriptor *ldt;
+#else
 union descriptor gdt[NGDT * MAXCPU];	/* global descriptor table */
+union descriptor ldt[NLDT];		/* local descriptor table */
+#endif
 static struct gate_descriptor idt0[NIDT];
 struct gate_descriptor *idt = &idt0[0];	/* interrupt descriptor table */
-union descriptor ldt[NLDT];		/* local descriptor table */
 struct region_descriptor r_gdt, r_idt;	/* table descriptors */
 
 int private_tss;			/* flag indicating private tss */
@@ -1355,7 +1397,7 @@
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
-	0,			/* segment descriptor priority level */
+	SEL_KPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
@@ -1382,7 +1424,7 @@
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMERA,		/* segment type */
-	0,			/* segment descriptor priority level */
+	SEL_KPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
@@ -1391,7 +1433,7 @@
 {	0x0,			/* segment base address  */
 	0xfffff,		/* length - all address space */
 	SDT_MEMRWA,		/* segment type */
-	0,			/* segment descriptor priority level */
+	SEL_KPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
@@ -1418,11 +1460,12 @@
 {	0x400,			/* segment base address */
 	0xfffff,		/* length */
 	SDT_MEMRWA,		/* segment type */
-	0,			/* segment descriptor priority level */
+	SEL_KPL,		/* segment descriptor priority level */
 	1,			/* segment descriptor present */
 	0, 0,
 	1,			/* default 32 vs 16 bit size */
 	1  			/* limit granularity (byte/page units)*/ },
+#ifndef XEN
 /* GPROC0_SEL	9 Proc 0 Tss Descriptor */
 {
 	0x0,			/* segment base address */
@@ -1514,6 +1557,7 @@
 	0, 0,
 	0,			/* default 32 vs 16 bit size */
 	0  			/* limit granularity (byte/page units)*/ },
+#endif /* !XEN */
 };
 
 static struct soft_segment_descriptor ldt_segs[] = {
@@ -1735,7 +1779,17 @@
 		goto physmap_done;
 	}
 #endif
-
+#ifdef XEN
+	has_smap = 0;
+	Maxmem = xen_start_info->nr_pages - init_first;
+	physmem = Maxmem;
+	basemem = 0;
+	physmap[0] = init_first << PAGE_SHIFT;
+	physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE);
+	physmap_idx = 0;
+	goto physmap_done;
+#endif
+	
 	hasbrokenint12 = 0;
 	TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12);
 	bzero(&vmf, sizeof(vmf));
@@ -1898,7 +1952,7 @@
 		vmf.vmf_ah = 0x88;
 		vm86_intcall(0x15, &vmf);
 		extmem = vmf.vmf_ax;
-#else
+#elif !defined(XEN)
 		/*
 		 * Prefer the RTC value for extended memory.
 		 */
@@ -1988,7 +2042,7 @@
 	if (getenv_quad("dcons.addr", &dcons_addr) == 0 ||
 	    getenv_quad("dcons.size", &dcons_size) == 0)
 		dcons_addr = 0;
-
+#ifndef XEN
 	/*
 	 * physmap is in bytes, so when converting to page boundaries,
 	 * round up the start address and round down the end address.
@@ -2106,7 +2160,10 @@
 	}
 	*pte = 0;
 	invltlb();
-
+#else
+	phys_avail[0] = physfree;
+	phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE;
+#endif
 	/*
 	 * XXX
 	 * The last chunk must contain at least one page plus the message
@@ -2128,7 +2185,261 @@
 	avail_end = phys_avail[pa_indx];
 }
 
+#ifdef XEN
+
+#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
 void
+init386(int first)
+{
+	int error, gsel_tss, metadata_missing, x;
+	unsigned long off, gdtmachpfn;
+	struct pcpu *pc;
+	struct callback_register event = {
+		.type = CALLBACKTYPE_event,
+		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback },
+	};
+	struct callback_register failsafe = {
+		.type = CALLBACKTYPE_failsafe,
+		.address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback },
+	};
+
+	thread0.td_kstack = proc0kstack;
+	thread0.td_pcb = (struct pcb *)
+	   (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
+
+	/*
+ 	 * This may be done better later if it gets more high level
+ 	 * components in it. If so just link td->td_proc here.
+	 */
+	proc_linkup(&proc0, &ksegrp0, &thread0);
+
+	metadata_missing = 0;
+	if (xen_start_info->mod_start) {
+	        preload_metadata = (caddr_t)xen_start_info->mod_start;
+		preload_bootstrap_relocate(KERNBASE);
+	} else {
+		metadata_missing = 1;
+	}
+	if (envmode == 1)
+		kern_envp = static_env;
+	else  if ((caddr_t)xen_start_info->cmd_line)
+	        kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line);
+
+	boothowto |= xen_boothowto(kern_envp);	
+
+	/* Init basic tunables, hz etc */
+	init_param1();
+
+	/*
+	 * XEN occupies a portion of the upper virtual address space 
+	 * At its base it manages an array mapping machine page frames 
+	 * to physical page frames - hence we need to be able to 
+	 * access 4GB - (64MB  - 4MB + 64k) 
+	 */
+	gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+	gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE);
+
+	pc = &__pcpu[0];
+	gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
+	gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+
+	PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW);
+	bzero(gdt, PAGE_SIZE);
+	for (x = 0; x < NGDT; x++)
+		ssdtosd(&gdt_segs[x], &gdt[x].sd);
+
+
+	if (bootverbose) {
+		printf("gdt=%p\n", gdt);
+		printf("PTmap=%p\n", PTmap);
+		printf("addr=%#jx\n", (uintmax_t)*vtopte((unsigned long)gdt) & ~PG_RW);
+	}
+
+	gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT;
+	PT_SET_MA(gdt, *vtopte((unsigned long)gdt) & ~(PG_RW|PG_M|PG_A));
+	error = HYPERVISOR_set_gdt(&gdtmachpfn, 512);
+	KASSERT(error == 0, ("unexpected result from set_gdt"));
+	lgdt(&r_gdt /* unused */);
+	gdtset = 1;
+
+	if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) {
+		panic("set_trap_table failed - error %d\n", error);
+	}
+
+	error = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
+	if (error == 0)
+		error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
+#if	CONFIG_XEN_COMPAT <= 0x030002
+	if (error == -ENOXENSYS)
+		HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL),
+		    (unsigned long)Xhypervisor_callback,
+		    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
+#endif
+	pcpu_init(pc, 0, sizeof(struct pcpu));
+	PCPU_SET(prvspace, pc);
+	PCPU_SET(curthread, &thread0);
+	PCPU_SET(curpcb, thread0.td_pcb);
+	PCPU_SET(pdir, (unsigned long)IdlePTD);
+    
+	/*
+	 * Initialize mutexes.
+	 *
+	 * icu_lock: in order to allow an interrupt to occur in a critical
+	 * 	     section, to set pcpu->ipending (etc...) properly, we
+	 *	     must be able to get the icu lock, so it can't be
+	 *	     under witness.
+	 */
+	mutex_init();
+	mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+	/* make ldt memory segments */
+	PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW);
+	bzero(ldt, PAGE_SIZE);
+	ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
+	ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
+	for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++)
+		ssdtosd(&ldt_segs[x], &ldt[x].sd);
+
+	default_proc_ldt.ldt_base = (caddr_t)ldt;
+	default_proc_ldt.ldt_len = 6;
+	_default_ldt = (int)&default_proc_ldt;
+	PCPU_SET(currentldt, _default_ldt)
+	PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW);
+	xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0]));
+	
+#ifdef XBOX
+	/*
+	 * The following code queries the PCI ID of 0:0:0. For the XBOX,
+	 * This should be 0x10de / 0x02a5.
+	 *
+	 * This is exactly what Linux does.
+	 */
+	outl(0xcf8, 0x80000000);
+	if (inl(0xcfc) == 0x02a510de) {
+		arch_i386_is_xbox = 1;
+		pic16l_setled(XBOX_LED_GREEN);
+
+		/*
+		 * We are an XBOX, but we may have either 64MB or 128MB of
+		 * memory. The PCI host bridge should be programmed for this,
+		 * so we just query it. 
+		 */
+		outl(0xcf8, 0x80000084);
+		arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64;
+	}
+#endif /* XBOX */
+#if defined (XEN_PRIVILEGED)
+	/*
+	 * Initialize the i8254 before the console so that console
+	 * initialization can use DELAY().
+	 */
+	i8254_init();
+#endif
+	/*
+	 * Initialize the console before we print anything out.
+	 */
+	cninit();
+
+	if (metadata_missing)
+		printf("WARNING: loader(8) metadata is missing!\n");
+
+#ifdef DEV_ISA
+	if (xen_start_info->flags & SIF_PRIVILEGED) {
+		elcr_probe();
+#ifdef DEV_ATPIC		
+		atpic_startup();
+#endif		
+	}
+#endif
+
+#ifdef DDB
+	ksym_start = bootinfo.bi_symtab;
+	ksym_end = bootinfo.bi_esymtab;
+#endif
+
+	kdb_init();
+
+#ifdef KDB
+	if (boothowto & RB_KDB)
+		kdb_enter("Boot flags requested debugger");
+#endif
+
+	finishidentcpu();	/* Final stage of CPU initialization */
+	setidt(IDT_UD, &IDTVEC(ill),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IDT_GP, &IDTVEC(prot),  SDT_SYS386TGT, SEL_KPL,
+	    GSEL(GCODE_SEL, SEL_KPL));
+	initializecpu();	/* Initialize CPU registers */
+
+	/* make an initial tss so cpu can get interrupt stack on syscall! */
+	/* Note: -16 is so we can grow the trapframe if we came from vm86 */
+	PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
+	    KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16);
+	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL),
+	    PCPU_GET(common_tss.tss_esp0));
+
+
+	/* pointer to selector slot for %fs/%gs */
+	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+
+	dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
+	    dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
+	dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
+	    dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+	dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
+	dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
+	dblfault_tss.tss_eip = (int)dblfault_handler;
+	dblfault_tss.tss_eflags = PSL_KERNEL;
+	dblfault_tss.tss_ds = dblfault_tss.tss_es =
+	    dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
+	dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
+	dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
+	dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
+
+	vm86_initialize();
+	getmemsize(first);
+	init_param2(physmem);
+
+
+	/* Map the message buffer. */
+	for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE)
+		pmap_kenter((vm_offset_t)msgbufp + off, avail_end + off);
+	
+	/* now running on new page tables, configured,and u/iom is accessible */
+
+	msgbufinit(msgbufp, MSGBUF_SIZE);
+
+	/* transfer to user mode */
+
+	_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
+	_udatasel = GSEL(GUDATA_SEL, SEL_UPL);
+
+	/* setup proc 0's pcb */
+	thread0.td_pcb->pcb_flags = 0;
+#ifdef PAE
+	thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
+	thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
+	thread0.td_pcb->pcb_ext = 0;
+	thread0.td_frame = &proc0_tf;
+        thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
+        thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
+}
+
+#else 
+void
 init386(first)
 	int first;
 {
@@ -2389,6 +2700,7 @@
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 }
+#endif /* !XEN */
 
 void
 cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
Index: i386/i386/trap.c
===================================================================
--- i386/i386/trap.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/trap.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -215,6 +215,7 @@
 	    goto out;
 #endif
 
+#ifndef XEN	
 	if ((frame.tf_eflags & PSL_I) == 0) {
 		/*
 		 * Buggy application or kernel code has disabled
@@ -245,6 +246,7 @@
 				enable_intr();
 		}
 	}
+#endif
 
 	eva = 0;
 	code = frame.tf_err;
Index: i386/i386/intr_machdep.c
===================================================================
--- i386/i386/intr_machdep.c	(.../stable/6/sys)	(revision 184012)
+++ i386/i386/intr_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -284,7 +284,12 @@
 	/* Schedule the ithread if needed. */
 	if (thread) {
 		error = intr_event_schedule_thread(ie);
+#ifndef XEN
 		KASSERT(error == 0, ("bad stray interrupt"));
+#else
+		if (error != 0)
+			log(LOG_CRIT, "bad stray interrupt %d", vector);
+#endif
 	}
 	critical_exit();
 	td->td_intr_nesting_level--;
Index: i386/xen/exception.s
===================================================================
--- i386/xen/exception.s	(.../stable/6/sys)	(revision 0)
+++ i386/xen/exception.s	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,489 @@
+/*-
+ * Copyright (c) 1989, 1990 William F. Jolitz.
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#include "opt_apic.h"
+#include "opt_npx.h"
+
+#include <machine/asmacros.h>
+#include <machine/psl.h>
+#include <machine/trap.h>
+
+#include "assym.s"
+
+#define	SEL_RPL_MASK	0x0002
+#define __HYPERVISOR_iret	23
+	
+/* Offsets into shared_info_t. */
+
+#define evtchn_upcall_pending /* 0 */
+#define evtchn_upcall_mask       1
+
+#define	sizeof_vcpu_shift	6
+
+		
+#ifdef SMP
+#define GET_VCPU_INFO(reg)	movl PCPU(CPUID),reg			; \
+				shl  $sizeof_vcpu_shift,reg		; \
+				addl HYPERVISOR_shared_info,reg
+#else
+#define GET_VCPU_INFO(reg)	movl HYPERVISOR_shared_info,reg
+#endif
+
+#define __DISABLE_INTERRUPTS(reg)	movb $1,evtchn_upcall_mask(reg)
+#define __ENABLE_INTERRUPTS(reg)	movb $0,evtchn_upcall_mask(reg)
+#define DISABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
+				__DISABLE_INTERRUPTS(reg)
+#define ENABLE_INTERRUPTS(reg)	GET_VCPU_INFO(reg)			; \
+				__ENABLE_INTERRUPTS(reg)
+#define __TEST_PENDING(reg)	testb $0xFF,evtchn_upcall_pending(reg)
+
+#define POPA \
+        popl %edi; \
+        popl %esi; \
+        popl %ebp; \
+        popl %ebx; \
+        popl %ebx; \
+        popl %edx; \
+        popl %ecx; \
+        popl %eax;
+
+	.text
+
+/*****************************************************************************/
+/* Trap handling                                                             */
+/*****************************************************************************/
+/*
+ * Trap and fault vector routines.
+ *
+ * Most traps are 'trap gates', SDT_SYS386TGT.  A trap gate pushes state on
+ * the stack that mostly looks like an interrupt, but does not disable 
+ * interrupts.  A few of the traps we are use are interrupt gates, 
+ * SDT_SYS386IGT, which are nearly the same thing except interrupts are
+ * disabled on entry.
+ *
+ * The cpu will push a certain amount of state onto the kernel stack for
+ * the current process.  The amount of state depends on the type of trap 
+ * and whether the trap crossed rings or not.  See i386/include/frame.h.  
+ * At the very least the current EFLAGS (status register, which includes 
+ * the interrupt disable state prior to the trap), the code segment register,
+ * and the return instruction pointer are pushed by the cpu.  The cpu 
+ * will also push an 'error' code for certain traps.  We push a dummy 
+ * error code for those traps where the cpu doesn't in order to maintain 
+ * a consistent frame.  We also push a contrived 'trap number'.
+ *
+ * The cpu does not push the general registers, we must do that, and we 
+ * must restore them prior to calling 'iret'.  The cpu adjusts the %cs and
+ * %ss segment registers, but does not mess with %ds, %es, or %fs.  Thus we
+ * must load them with appropriate values for supervisor mode operation.
+ */
+
+MCOUNT_LABEL(user)
+MCOUNT_LABEL(btrap)
+
+IDTVEC(div)
+	pushl $0; TRAP(T_DIVIDE)
+IDTVEC(dbg)
+	pushl $0; TRAP(T_TRCTRAP)
+IDTVEC(nmi)
+	pushl $0; TRAP(T_NMI)
+IDTVEC(bpt)
+	pushl $0; TRAP(T_BPTFLT)
+IDTVEC(ofl)
+	pushl $0; TRAP(T_OFLOW)
+IDTVEC(bnd)
+	pushl $0; TRAP(T_BOUND)
+IDTVEC(ill)
+	pushl $0; TRAP(T_PRIVINFLT)
+IDTVEC(dna)
+	pushl $0; TRAP(T_DNA)
+IDTVEC(fpusegm)
+	pushl $0; TRAP(T_FPOPFLT)
+IDTVEC(tss)
+	TRAP(T_TSSFLT)
+IDTVEC(missing)
+	TRAP(T_SEGNPFLT)
+IDTVEC(stk)
+	TRAP(T_STKFLT)
+IDTVEC(prot)
+	TRAP(T_PROTFLT)
+IDTVEC(page)
+	TRAP(T_PAGEFLT)
+IDTVEC(mchk)
+	pushl $0; TRAP(T_MCHK)
+IDTVEC(rsvd)
+	pushl $0; TRAP(T_RESERVED)
+IDTVEC(fpu)
+	pushl $0; TRAP(T_ARITHTRAP)
+IDTVEC(align)
+	TRAP(T_ALIGNFLT)
+IDTVEC(xmm)
+	pushl $0; TRAP(T_XMMFLT)
+
+IDTVEC(hypervisor_callback)
+	pushl $0; 
+	pushl $0; 
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+upcall_with_regs_pushed:
+	SET_KERNEL_SREGS
+	FAKE_MCOUNT(TF_EIP(%esp))
+call_evtchn_upcall:
+	movl	TF_EIP(%esp),%eax
+	cmpl	$scrit,%eax
+	jb	10f
+	cmpl	$ecrit,%eax
+	jb	critical_region_fixup
+	
+10:	pushl	%esp
+	call	evtchn_do_upcall
+	addl	$4,%esp
+
+	/*
+	 * Return via doreti to handle ASTs.
+	 */
+	MEXITCOUNT
+	jmp	doreti
+	
+	
+hypervisor_callback_pending:
+	DISABLE_INTERRUPTS(%esi)				/*	cli */	
+	jmp	10b
+
+	/*
+	 * alltraps entry point.  Interrupts are enabled if this was a trap
+	 * gate (TGT), else disabled if this was an interrupt gate (IGT).
+	 * Note that int0x80_syscall is a trap gate.  Only page faults
+	 * use an interrupt gate.
+	 */
+
+	SUPERALIGN_TEXT
+	.globl	alltraps
+	.type	alltraps,@function
+alltraps:
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+alltraps_with_regs_pushed:
+	SET_KERNEL_SREGS
+	FAKE_MCOUNT(TF_EIP(%esp))
+calltrap:
+	call	trap
+
+	/*
+	 * Return via doreti to handle ASTs.
+	 */
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * SYSCALL CALL GATE (old entry point for a.out binaries)
+ *
+ * The intersegment call has been set up to specify one dummy parameter.
+ *
+ * This leaves a place to put eflags so that the call frame can be
+ * converted to a trap frame. Note that the eflags is (semi-)bogusly
+ * pushed into (what will be) tf_err and then copied later into the
+ * final spot. It has to be done this way because esp can't be just
+ * temporarily altered for the pushfl - an interrupt might come in
+ * and clobber the saved cs/eip.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(lcall_syscall)
+	pushfl				/* save eflags */
+	popl	8(%esp)			/* shuffle into tf_eflags */
+	pushl	$7			/* sizeof "lcall 7,0" */
+	subl	$4,%esp			/* skip over tf_trapno */
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+	SET_KERNEL_SREGS
+	FAKE_MCOUNT(TF_EIP(%esp))
+	call	syscall
+	MEXITCOUNT
+	jmp	doreti
+
+/*
+ * Call gate entry for FreeBSD ELF and Linux/NetBSD syscall (int 0x80)
+ *
+ * Even though the name says 'int0x80', this is actually a TGT (trap gate)
+ * rather then an IGT (interrupt gate).  Thus interrupts are enabled on
+ * entry just as they are for a normal syscall.
+ */
+	SUPERALIGN_TEXT
+IDTVEC(int0x80_syscall)
+	pushl	$2			/* sizeof "int 0x80" */
+	subl	$4,%esp			/* skip over tf_trapno */
+	pushal
+	pushl	%ds
+	pushl	%es
+	pushl	%fs
+	SET_KERNEL_SREGS
+	FAKE_MCOUNT(TF_EIP(%esp))
+	call	syscall
+	MEXITCOUNT
+	jmp	doreti
+
+ENTRY(fork_trampoline)
+	pushl	%esp			/* trapframe pointer */
+	pushl	%ebx			/* arg1 */
+	pushl	%esi			/* function */
+	call	fork_exit
+	addl	$12,%esp
+	/* cut from syscall */
+
+	/*
+	 * Return via doreti to handle ASTs.
+	 */
+	MEXITCOUNT
+	jmp	doreti
+
+
+/*
+ * To efficiently implement classification of trap and interrupt handlers
+ * for profiling, there must be only trap handlers between the labels btrap
+ * and bintr, and only interrupt handlers between the labels bintr and
+ * eintr.  This is implemented (partly) by including files that contain
+ * some of the handlers.  Before including the files, set up a normal asm
+ * environment so that the included files doen't need to know that they are
+ * included.
+ */
+
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+MCOUNT_LABEL(bintr)
+
+#ifdef DEV_ATPIC	
+#include <i386/isa/atpic_vector.s>
+#endif
+#ifdef DEV_APIC
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+
+#include <i386/i386/apic_vector.s>
+#endif
+
+	.data
+	.p2align 4
+	.text
+	SUPERALIGN_TEXT
+#include <i386/i386/vm86bios.s>
+
+	.text
+MCOUNT_LABEL(eintr)
+
+/*
+ * void doreti(struct trapframe)
+ *
+ * Handle return from interrupts, traps and syscalls.
+ */
+	.text
+	SUPERALIGN_TEXT
+	.type	doreti,@function
+doreti:
+	FAKE_MCOUNT($bintr)		/* init "from" bintr -> doreti */
+doreti_next:
+#ifdef notyet
+	/*
+	 * Check if ASTs can be handled now.  PSL_VM must be checked first
+	 * since segment registers only have an RPL in non-VM86 mode.
+	 */
+	testl	$PSL_VM,TF_EFLAGS(%esp)	/* are we in vm86 mode? */
+	jz	doreti_notvm86
+	movl	PCPU(CURPCB),%ecx
+	testl	$PCB_VM86CALL,PCB_FLAGS(%ecx)	/* are we in a vm86 call? */
+	jz	doreti_ast		/* can handle ASTS now if not */
+  	jmp	doreti_exit
+
+doreti_notvm86:
+#endif
+	testb	$SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
+	jz	doreti_exit		/* can't handle ASTs now if not */
+
+doreti_ast:
+	/*
+	 * Check for ASTs atomically with returning.  Disabling CPU
+	 * interrupts provides sufficient locking even in the SMP case,
+	 * since we will be informed of any new ASTs by an IPI.
+	 */
+	DISABLE_INTERRUPTS(%esi)				/*	cli */
+	movl	PCPU(CURTHREAD),%eax
+	testl	$TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%eax)
+	je	doreti_exit
+	ENABLE_INTERRUPTS(%esi)	/* sti */
+	pushl	%esp			/* pass a pointer to the trapframe */
+	call	ast
+	add	$4,%esp
+	jmp	doreti_ast
+
+	/*
+	 * doreti_exit:	pop registers, iret.
+	 *
+	 *	The segment register pop is a special case, since it may
+	 *	fault if (for example) a sigreturn specifies bad segment
+	 *	registers.  The fault is handled in trap.c.
+	 */
+doreti_exit:
+	ENABLE_INTERRUPTS(%esi) # reenable event callbacks (sti)
+
+	.globl	scrit
+scrit:
+	__TEST_PENDING(%esi)
+        jnz	hypervisor_callback_pending	/* More to go  */
+
+	MEXITCOUNT
+
+	.globl	doreti_popl_fs
+doreti_popl_fs:
+	popl	%fs
+	.globl	doreti_popl_es
+doreti_popl_es:
+	popl	%es
+	.globl	doreti_popl_ds
+doreti_popl_ds:
+	popl	%ds
+
+	/*
+	 * This is important: as nothing is atomic over here (we can get
+	 * interrupted any time), we use the critical_region_fixup() in
+	 * order to figure out where out stack is. Therefore, do NOT use
+	 * 'popal' here without fixing up the table!
+	 */
+	POPA
+	addl	$8,%esp
+	.globl	doreti_iret
+doreti_iret:
+	jmp	hypercall_page + (__HYPERVISOR_iret * 32)
+	.globl	ecrit
+ecrit:
+  	/*
+	 * doreti_iret_fault and friends.  Alternative return code for
+	 * the case where we get a fault in the doreti_exit code
+	 * above.  trap() (i386/i386/trap.c) catches this specific
+	 * case, sends the process a signal and continues in the
+	 * corresponding place in the code below.
+	 */
+	ALIGN_TEXT
+	.globl	doreti_iret_fault
+doreti_iret_fault:
+	subl	$8,%esp
+	pushal
+	pushl	%ds
+	.globl	doreti_popl_ds_fault
+doreti_popl_ds_fault:
+	pushl	%es
+	.globl	doreti_popl_es_fault
+doreti_popl_es_fault:
+	pushl	%fs
+	.globl	doreti_popl_fs_fault
+doreti_popl_fs_fault:
+	movl	$0,TF_ERR(%esp)	/* XXX should be the error code */
+	movl	$T_PROTFLT,TF_TRAPNO(%esp)
+	jmp	alltraps_with_regs_pushed
+
+	/*
+# [How we do the fixup]. We want to merge the current stack frame with the
+# just-interrupted frame. How we do this depends on where in the critical
+# region the interrupted handler was executing, and so how many saved
+# registers are in each frame. We do this quickly using the lookup table
+# 'critical_fixup_table'. For each byte offset in the critical region, it
+# provides the number of bytes which have already been popped from the
+# interrupted stack frame.
+*/
+
+.globl critical_region_fixup
+critical_region_fixup:
+	addl $critical_fixup_table-scrit,%eax
+	movzbl (%eax),%eax    # %eax contains num bytes popped
+        movl  %esp,%esi
+        add  %eax,%esi        # %esi points at end of src region
+        movl  %esp,%edi
+        add  $0x40,%edi       # %edi points at end of dst region
+        movl  %eax,%ecx
+        shr  $2,%ecx          # convert bytes to words
+        je   16f              # skip loop if nothing to copy
+15:     subl $4,%esi          # pre-decrementing copy loop
+        subl $4,%edi
+        movl (%esi),%eax
+        movl %eax,(%edi)
+        loop 15b
+16:     movl %edi,%esp        # final %edi is top of merged stack
+	jmp  hypervisor_callback_pending
+
+
+critical_fixup_table:        
+.byte   0x0,0x0,0x0			#testb  $0x1,(%esi)
+.byte   0x0,0x0,0x0,0x0,0x0,0x0		#jne    ea 
+.byte   0x0,0x0				#pop    %fs
+.byte   0x04				#pop    %es
+.byte   0x08				#pop    %ds
+.byte   0x0c				#pop    %edi
+.byte   0x10	                        #pop    %esi
+.byte   0x14	                        #pop    %ebp
+.byte   0x18	                        #pop    %ebx
+.byte   0x1c	                        #pop    %ebx
+.byte   0x20	                        #pop    %edx
+.byte   0x24	                        #pop    %ecx
+.byte   0x28	                        #pop    %eax
+.byte   0x2c,0x2c,0x2c                  #add    $0x8,%esp
+#if 0
+.byte   0x34				#iret   
+#endif
+.byte   0x34,0x34,0x34,0x34,0x34        #HYPERVISOR_iret
+
+	
+/* # Hypervisor uses this for application faults while it executes.*/
+ENTRY(failsafe_callback)
+	pushal
+	call xen_failsafe_handler
+/*#	call install_safe_pf_handler */
+        movl 28(%esp),%ebx
+1:      movl %ebx,%ds
+        movl 32(%esp),%ebx
+2:      movl %ebx,%es
+        movl 36(%esp),%ebx
+3:      movl %ebx,%fs
+        movl 40(%esp),%ebx
+4:      movl %ebx,%gs
+/*#        call install_normal_pf_handler */
+	popal
+	addl $12,%esp
+	iret
+
+

Property changes on: i386/xen/exception.s
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/locore.s
===================================================================
--- i386/xen/locore.s	(.../stable/6/sys)	(revision 0)
+++ i386/xen/locore.s	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,373 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)locore.s	7.3 (Berkeley) 5/13/91
+ * $FreeBSD$
+ *
+ *		originally from: locore.s, by William F. Jolitz
+ *
+ *		Substantially rewritten by David Greenman, Rod Grimes,
+ *			Bruce Evans, Wolfgang Solfrank, Poul-Henning Kamp
+ *			and many others.
+ */
+
+#include "opt_bootp.h"
+#include "opt_compat.h"
+#include "opt_nfsroot.h"
+#include "opt_global.h"
+#include "opt_pmap.h"
+
+#include <sys/syscall.h>
+#include <sys/reboot.h>
+
+#include <machine/asmacros.h>
+#include <machine/cputypes.h>
+#include <machine/psl.h>
+#include <machine/pmap.h>
+#include <machine/specialreg.h>
+
+#define __ASSEMBLY__	
+#include <xen/interface/elfnote.h>
+		
+/* The defines below have been lifted out of <machine/xen-public/arch-x86_32.h> */
+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
+#define KERNEL_CS FLAT_RING1_CS 
+#define KERNEL_DS FLAT_RING1_DS
+
+#include "assym.s"
+
+.section __xen_guest
+	.ascii "LOADER=generic,GUEST_OS=freebsd,GUEST_VER=7.0,XEN_VER=xen-3.0,BSD_SYMTAB,VIRT_BASE=0xc0000000"
+	.byte 0
+
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "FreeBSD")	
+	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "HEAD")
+	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
+	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  KERNBASE)
+	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  KERNBASE)
+	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  btext)
+	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
+	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
+#if 0
+	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_page_tables|writable_descriptor_tables|auto_translated_physmap|pae_pgdir_above_4gb|supervisor_mode_kernel")
+#endif
+	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_page_tables|supervisor_mode_kernel|writable_descriptor_tables")
+		
+#ifdef PAE
+	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
+	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PG_V, PG_V)
+#else
+	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "no")
+	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .long,  PG_V, PG_V)
+#endif
+	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
+	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  1)		
+
+	
+	
+/*
+ *	XXX
+ *
+ * Note: This version greatly munged to avoid various assembler errors
+ * that may be fixed in newer versions of gas. Perhaps newer versions
+ * will have more pleasant appearance.
+ */
+
+/*
+ * PTmap is recursive pagemap at top of virtual address space.
+ * Within PTmap, the page directory can be found (third indirection).
+ */
+	.globl	PTmap,PTD,PTDpde
+	.set	PTmap,(PTDPTDI << PDRSHIFT)
+	.set	PTD,PTmap + (PTDPTDI * PAGE_SIZE)
+	.set	PTDpde,PTD + (PTDPTDI * PDESIZE)
+
+/*
+ * Compiled KERNBASE location and the kernel load address
+ */
+	.globl	kernbase
+	.set	kernbase,KERNBASE
+	.globl	kernload
+	.set	kernload,KERNLOAD
+
+/*
+ * Globals
+ */
+	.data
+	ALIGN_DATA			/* just to be sure */
+
+	.space	0x2000			/* space for tmpstk - temporary stack */
+tmpstk:
+
+		.globl	bootinfo
+bootinfo:	.space	BOOTINFO_SIZE	/* bootinfo that we can handle */
+
+		.globl KERNend
+KERNend:	.long	0		/* phys addr end of kernel (just after bss) */
+		.globl physfree
+physfree:	.long	0		/* phys addr of next free page */
+
+#ifdef SMP
+		.globl	cpu0prvpage
+cpu0pp:		.long	0		/* phys addr cpu0 private pg */
+cpu0prvpage:	.long	0		/* relocated version */
+
+		.globl	SMPpt
+SMPptpa:	.long	0		/* phys addr SMP page table */
+SMPpt:		.long	0		/* relocated version */
+#endif /* SMP */
+
+	.globl	IdlePTD
+IdlePTD:	.long	0		/* phys addr of kernel PTD */
+
+#ifdef PAE
+	.globl	IdlePDPT
+IdlePDPT:	.long	0		/* phys addr of kernel PDPT */
+#endif
+
+#ifdef SMP
+	.globl	KPTphys
+#endif
+	.globl	gdtset
+KPTphys:	.long	0		/* phys addr of kernel page tables */
+gdtset:		.long	0
+	
+	.globl	proc0kstack
+proc0uarea:	.long	0		/* address of proc 0 uarea (unused)*/
+proc0kstack:	.long	0		/* address of proc 0 kstack space */
+p0upa:		.long	0		/* phys addr of proc0 UAREA (unused) */
+p0kpa:		.long	0		/* phys addr of proc0's STACK */
+
+vm86phystk:	.long	0		/* PA of vm86/bios stack */
+
+	.globl	vm86paddr, vm86pa
+vm86paddr:	.long	0		/* address of vm86 region */
+vm86pa:		.long	0		/* phys addr of vm86 region */
+
+#ifdef PC98
+	.globl	pc98_system_parameter
+pc98_system_parameter:
+	.space	0x240
+#endif
+
+	.globl	avail_space
+avail_space:	.long 0
+
+/**********************************************************************
+ *
+ * Some handy macros
+ *
+ */
+
+/*
+ * We're already in protected mode, so no remapping is needed.
+ */	
+#define R(foo) (foo)
+	
+#define ALLOCPAGES(foo) \
+	movl	R(physfree), %esi ; \
+	movl	$((foo)*PAGE_SIZE), %eax ; \
+	addl	%esi, %eax ; \
+	movl	%eax, R(physfree) ; \
+	movl	%esi, %edi ; \
+	movl	$((foo)*PAGE_SIZE),%ecx ; \
+	xorl	%eax,%eax ; \
+	cld ; \
+	rep ; \
+	stosb
+
+/*
+ * fillkpt
+ *	eax = page frame address
+ *	ebx = index into page table
+ *	ecx = how many pages to map
+ * 	base = base address of page dir/table
+ *	prot = protection bits
+ */
+#define	fillkpt(base, prot)		  \
+	shll	$PTESHIFT,%ebx		; \
+	addl	base,%ebx		; \
+	orl	$PG_V,%eax		; \
+	orl	prot,%eax		; \
+1:	movl	%eax,(%ebx)		; \
+	addl	$PAGE_SIZE,%eax		; /* increment physical address */ \
+	addl	$PTESIZE,%ebx		; /* next pte */ \
+	loop	1b
+
+/*
+ * fillkptphys(prot)
+ *	eax = physical address
+ *	ecx = how many pages to map
+ *	prot = protection bits
+ */
+#define	fillkptphys(prot)		  \
+	movl	%eax, %ebx		; \
+	shrl	$PAGE_SHIFT, %ebx	; \
+	fillkpt(R(KPTphys), prot)
+
+/* Temporary stack */
+.space 	8192
+tmpstack:
+	.long	tmpstack, KERNEL_DS
+
+	.text
+
+.p2align 12,	0x90	
+		
+#define HYPERCALL_PAGE_OFFSET 0x1000
+.org HYPERCALL_PAGE_OFFSET
+ENTRY(hypercall_page)
+	.cfi_startproc
+	.skip	0x1000
+	.cfi_endproc
+
+/**********************************************************************
+ *
+ * This is where the bootblocks start us, set the ball rolling...
+ *
+ */
+NON_GPROF_ENTRY(btext)
+	/* At the end of our stack, we shall have free space - so store it */
+	movl	%esp,%ebx
+	movl	%ebx,R(avail_space)
+
+	lss	tmpstack,%esp
+
+	pushl   %esi
+	call	initvalues	
+	popl	%esi
+
+	/* Store the CPUID information */
+	xorl	%eax,%eax
+	cpuid					# cpuid 0
+	movl	%eax,R(cpu_high)		# highest capability
+	movl	%ebx,R(cpu_vendor)		# store vendor string
+	movl	%edx,R(cpu_vendor+4)
+	movl	%ecx,R(cpu_vendor+8)
+	movb	$0,R(cpu_vendor+12)
+
+	movl	$1,%eax
+	cpuid					# cpuid 1
+	movl	%eax,R(cpu_id)			# store cpu_id
+	movl	%ebx,R(cpu_procinfo)		# store cpu_procinfo
+	movl	%edx,R(cpu_feature)		# store cpu_feature
+	movl	%ecx,R(cpu_feature2)		# store cpu_feature2
+	rorl	$8,%eax				# extract family type
+	andl	$15,%eax
+	cmpl	$5,%eax
+	movl	$CPU_686,R(cpu)
+
+	movl	proc0kstack,%eax
+	leal	(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE)(%eax),%esp
+	xorl    %ebp,%ebp               /* mark end of frames */
+#ifdef PAE
+	movl    IdlePDPT,%esi
+#else	
+	movl    IdlePTD,%esi
+#endif	
+	movl    %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
+	pushl	physfree
+	call	init386
+	addl	$4, %esp
+	call	mi_startup
+	/* NOTREACHED */
+	int	$3
+
+/*
+ * Signal trampoline, copied to top of user stack
+ */
+NON_GPROF_ENTRY(sigcode)
+	calll	*SIGF_HANDLER(%esp)
+	leal	SIGF_UC(%esp),%eax	/* get ucontext */
+	pushl	%eax
+	testl	$PSL_VM,UC_EFLAGS(%eax)
+	jne	1f
+	mov	UC_GS(%eax), %gs	/* restore %gs */
+1:
+	movl	$SYS_sigreturn,%eax
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+
+#ifdef COMPAT_FREEBSD4
+	ALIGN_TEXT
+freebsd4_sigcode:
+	calll	*SIGF_HANDLER(%esp)
+	leal	SIGF_UC4(%esp),%eax	/* get ucontext */
+	pushl	%eax
+	testl	$PSL_VM,UC4_EFLAGS(%eax)
+	jne	1f
+	mov	UC4_GS(%eax),%gs	/* restore %gs */
+1:
+	movl	$344,%eax		/* 4.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+					/* on stack */
+1:
+	jmp	1b
+#endif
+
+#ifdef COMPAT_43
+	ALIGN_TEXT
+osigcode:
+	call	*SIGF_HANDLER(%esp)	/* call signal handler */
+	lea	SIGF_SC(%esp),%eax	/* get sigcontext */
+	pushl	%eax
+	testl	$PSL_VM,SC_PS(%eax)
+	jne	9f
+	movl	SC_GS(%eax),%gs		/* restore %gs */
+9:
+	movl	$103,%eax		/* 3.x SYS_sigreturn */
+	pushl	%eax			/* junk to fake return addr. */
+	int	$0x80			/* enter kernel with args */
+0:	jmp	0b
+#endif /* COMPAT_43 */
+
+	ALIGN_TEXT
+esigcode:
+
+	.data
+	.globl	szsigcode
+szsigcode:
+	.long	esigcode-sigcode
+#ifdef COMPAT_FREEBSD4
+	.globl	szfreebsd4_sigcode
+szfreebsd4_sigcode:
+	.long	esigcode-freebsd4_sigcode
+#endif
+#ifdef COMPAT_43
+	.globl	szosigcode
+szosigcode:
+	.long	esigcode-osigcode
+#endif

Property changes on: i386/xen/locore.s
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/xen_bus.c
===================================================================
--- i386/xen/xen_bus.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/xen_bus.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,238 @@
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+
+#include <machine/frame.h> 
+#include <machine/intr_machdep.h> 
+#include <machine/resource.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+
+static MALLOC_DEFINE(M_XENDEV, "xenintrdrv", "xen system device");
+
+struct xenbus_device {
+    struct resource_list  xen_resources;
+};
+
+#define DEVTOXEN(dev)       ((struct xenbus_device *)device_get_ivars(dev))
+
+static void xenbus_identify(driver_t *, device_t); 
+static int xenbus_probe(device_t);
+static int xenbus_attach(device_t);
+static int xenbus_print_child(device_t, device_t);
+static device_t xenbus_add_child(device_t bus, int order, const char *name, 
+				 int unit);
+static struct resource *xenbus_alloc_resource(device_t, device_t, int, int *,
+					      u_long, u_long, u_long, u_int);
+static  int xenbus_release_resource(device_t, device_t, int, int, 
+				    struct resource *); 
+static  int xenbus_set_resource(device_t, device_t, int, int, u_long, u_long); 
+static  int xenbus_get_resource(device_t, device_t, int, int, u_long *, u_long *); 
+static void xenbus_delete_resource(device_t, device_t, int, int); 
+
+
+static device_method_t xenbus_methods[] = { 
+    /* Device interface */ 
+    DEVMETHOD(device_identify,      xenbus_identify), 
+    DEVMETHOD(device_probe,         xenbus_probe), 
+    DEVMETHOD(device_attach,        xenbus_attach), 
+    DEVMETHOD(device_detach,        bus_generic_detach), 
+    DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
+    DEVMETHOD(device_suspend,       bus_generic_suspend), 
+    DEVMETHOD(device_resume,        bus_generic_resume), 
+ 
+    /* Bus interface */ 
+    DEVMETHOD(bus_print_child,      xenbus_print_child),
+    DEVMETHOD(bus_add_child,        xenbus_add_child), 
+    DEVMETHOD(bus_read_ivar,        bus_generic_read_ivar), 
+    DEVMETHOD(bus_write_ivar,       bus_generic_write_ivar), 
+    DEVMETHOD(bus_set_resource,     xenbus_set_resource), 
+    DEVMETHOD(bus_get_resource,     xenbus_get_resource), 
+    DEVMETHOD(bus_alloc_resource,   xenbus_alloc_resource), 
+    DEVMETHOD(bus_release_resource, xenbus_release_resource), 
+    DEVMETHOD(bus_delete_resource,  xenbus_delete_resource), 
+    DEVMETHOD(bus_activate_resource, bus_generic_activate_resource), 
+    DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource), 
+    DEVMETHOD(bus_setup_intr,       bus_generic_setup_intr), 
+    DEVMETHOD(bus_teardown_intr,    bus_generic_teardown_intr), 
+ 
+    { 0, 0 } 
+}; 
+
+
+static driver_t xenbus_driver = { 
+    "xenbus", 
+    xenbus_methods, 
+    1,                      /* no softc */ 
+}; 
+static devclass_t xenbus_devclass; 
+static device_t xenbus_dev;
+static boolean_t xenbus_probe_delay = TRUE;	/* delay child probes */
+ 
+DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0); 
+ 
+static void 
+xenbus_identify(driver_t *driver, device_t parent) 
+{ 
+ 
+    /* 
+     * Add child device with order of 0 so it gets probed 
+     * first
+     */ 
+    xenbus_dev = BUS_ADD_CHILD(parent, 0, "xenbus", 0);
+    if (xenbus_dev == NULL)
+	panic("xenbus: could not attach");
+} 
+
+static int 
+xenbus_probe(device_t dev) 
+{ 
+    device_set_desc(dev, "xen system"); 
+    device_quiet(dev); 
+    return (0); 
+} 
+
+static int 
+xenbus_attach(device_t dev) 
+{ 
+    /* 
+     * First, let our child driver's identify any child devices that 
+     * they can find.  Once that is done attach any devices that we 
+     * found. 
+     */ 
+    if (!xenbus_probe_delay) {
+    	bus_generic_probe(dev); 
+    	bus_generic_attach(dev); 
+    }
+ 
+    return 0; 
+} 
+
+
+static int 
+xenbus_print_all_resources(device_t dev) 
+{ 
+    struct xenbus_device *xdev = device_get_ivars(dev); 
+    struct resource_list *rl = &xdev->xen_resources;
+    int retval = 0;
+
+    if (STAILQ_FIRST(rl))
+	    retval += printf(" at");
+    
+    retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
+    retval += resource_list_print_type(rl, "iomem", SYS_RES_MEMORY, "%#lx");
+    retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
+
+    return retval; 
+}
+ 
+
+static int 
+xenbus_print_child(device_t bus, device_t child) 
+{ 
+    int retval = 0; 
+ 
+    retval += bus_print_child_header(bus, child); 
+    retval += xenbus_print_all_resources(child); 
+    retval += printf(" on motherboard\n");	/* XXX "motherboard", ick */
+ 
+    return (retval); 
+} 
+
+static device_t 
+xenbus_add_child(device_t bus, int order, const char *name, int unit) 
+{ 
+    device_t child; 
+    struct xenbus_device *xendev; 
+ 
+    xendev = malloc(sizeof(struct xenbus_device), M_XENDEV, 
+		   M_NOWAIT | M_ZERO); 
+    if (!xendev)
+	return(0); 
+    resource_list_init(&xendev->xen_resources); 
+
+    child = device_add_child_ordered(bus, order, name, unit);  
+ 
+    /* should we free this in xenbus_child_detached? */ 
+    device_set_ivars(child, xendev); 
+ 
+    return(child); 
+} 
+
+static struct resource * 
+xenbus_alloc_resource(device_t bus, device_t child, int type, int *rid, 
+		      u_long start, u_long end, u_long count, u_int flags) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    return (resource_list_alloc(rl, bus, child, type, rid, start, end, 
+				count, flags)); 
+} 
+
+
+static int 
+xenbus_release_resource(device_t bus, device_t child, int type, int rid, 
+			struct resource *r) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    return (resource_list_release(rl, bus, child, type, rid, r)); 
+} 
+
+static int 
+xenbus_set_resource(device_t dev, device_t child, int type, int rid, 
+		    u_long start, u_long count) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    resource_list_add(rl, type, rid, start, start + count - 1, count); 
+    return(0); 
+} 
+
+static int 
+xenbus_get_resource(device_t dev, device_t child, int type, int rid, 
+		    u_long *startp, u_long *countp) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+    struct resource_list_entry *rle; 
+ 
+    rle = resource_list_find(rl, type, rid); 
+    if (!rle) 
+	return(ENOENT); 
+    if (startp) 
+	*startp = rle->start; 
+    if (countp) 
+	*countp = rle->count; 
+    return(0); 
+} 
+
+static void 
+xenbus_delete_resource(device_t dev, device_t child, int type, int rid) 
+{ 
+    struct xenbus_device *xendev = DEVTOXEN(child); 
+    struct resource_list *rl = &xendev->xen_resources; 
+ 
+    resource_list_delete(rl, type, rid); 
+} 
+
+static void
+xenbus_init(void *unused)
+{
+    	xenbus_probe_delay = FALSE;
+	xenbus_attach(xenbus_dev);
+}
+SYSINIT(xenbusdev, SI_SUB_PSEUDO, SI_ORDER_FIRST, xenbus_init, NULL);

Property changes on: i386/xen/xen_bus.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/mptable.c
===================================================================
--- i386/xen/mptable.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/mptable.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,130 @@
+/*-
+ * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
+#include <machine/apicreg.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <machine/md_var.h>
+#include <machine/mptable.h>
+#include <machine/specialreg.h>
+
+#include <xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/smp.h>
+#include <xen/interface/vcpu.h>
+
+
+static int	mptable_probe(void);
+static int	mptable_probe_cpus(void);
+static void	mptable_register(void *dummy);
+static int	mptable_setup_local(void);
+static int	mptable_setup_io(void);
+
+static struct apic_enumerator mptable_enumerator = {
+	"MPTable",
+	mptable_probe,
+	mptable_probe_cpus,
+	mptable_setup_local,
+	mptable_setup_io
+};
+
+static int
+mptable_probe(void)
+{
+
+	return (-100);
+}
+
+static int
+mptable_probe_cpus(void)
+{
+	int i, rc;
+
+	for (i = 0; i < MAXCPU; i++) {
+		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+		if (rc >= 0)
+			cpu_add(i, (i == 0));
+	}
+
+	return (0);
+}
+
+/*
+ * Initialize the local APIC on the BSP.
+ */
+static int
+mptable_setup_local(void)
+{
+
+	return (0);
+}
+
+static int
+mptable_setup_io(void)
+{
+
+	return (0);
+}
+
+static void
+mptable_register(void *dummy __unused)
+{
+
+	apic_register_enumerator(&mptable_enumerator);
+}
+SYSINIT(mptable_register, SI_SUB_CPU - 1, SI_ORDER_FIRST, mptable_register,
+    NULL);
+
+
+
+int
+mptable_pci_probe_table(int bus)
+{
+
+	return (0);
+}
+
+int
+mptable_pci_route_interrupt(device_t pcib, device_t dev, int pin)
+{
+
+	return (0);
+}
+

Property changes on: i386/xen/mptable.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/clock.c
===================================================================
--- i386/xen/clock.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/clock.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,963 @@
+/*-
+ * Copyright (c) 1990 The Regents of the University of California.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * William Jolitz and Don Ahn.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/* #define DELAYDEBUG */
+/*
+ * Routines to handle clock hardware.
+ */
+
+#include "opt_ddb.h"
+#include "opt_clock.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/clock.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/timetc.h>
+#include <sys/kernel.h>
+#include <sys/limits.h>
+#include <sys/sysctl.h>
+#include <sys/cons.h>
+#include <sys/power.h>
+
+#include <machine/clock.h>
+#include <machine/cputypes.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/psl.h>
+#if defined(SMP)
+#include <machine/smp.h>
+#endif
+#include <machine/specialreg.h>
+#include <machine/timerreg.h>
+
+#include <i386/isa/icu.h>
+#include <i386/isa/isa.h>
+#include <isa/rtc.h>
+
+#include <xen/xen_intr.h>
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/pmap.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/interface/vcpu.h>
+#include <machine/cpu.h>
+
+/*
+ * 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
+ * can use a simple formula for leap years.
+ */
+#define	LEAPYEAR(y)	(!((y) % 4))
+#define	DAYSPERYEAR	(28+30*4+31*7)
+
+#ifndef TIMER_FREQ
+#define	TIMER_FREQ	1193182
+#endif
+
+#ifdef CYC2NS_SCALE_FACTOR
+#undef	CYC2NS_SCALE_FACTOR
+#endif
+#define CYC2NS_SCALE_FACTOR	10
+
+/* Values for timerX_state: */
+#define	RELEASED	0
+#define	RELEASE_PENDING	1
+#define	ACQUIRED	2
+#define	ACQUIRE_PENDING	3
+
+#define	RTC_LOCK_INIT							\
+	mtx_init(&clock_lock, "clk", NULL, MTX_SPIN)
+#define	RTC_LOCK	mtx_lock_spin(&clock_lock)
+#define	RTC_UNLOCK	mtx_unlock_spin(&clock_lock)
+
+int adjkerntz;		/* local offset from GMT in seconds */
+int clkintr_pending;
+int pscnt = 1;
+int psdiv = 1;
+int statclock_disable;
+int disable_rtc_set = 0;
+int wall_cmos_clock;
+u_int timer_freq = TIMER_FREQ;
+static int independent_wallclock;
+static int xen_disable_rtc_set;
+static u_long cached_gtm;	/* cached quotient for TSC -> microseconds */
+static u_long cyc2ns_scale; 
+static u_char timer2_state = RELEASED;
+static struct timespec shadow_tv;
+static uint32_t shadow_tv_version;	/* XXX: lazy locking */
+static uint64_t processed_system_time;	/* stime (ns) at last processing. */
+static unsigned int time_irq;
+
+#ifdef XEN_PRIVILEGED_GUEST
+static struct mtx clock_lock;
+static int rtc_reg;
+#endif
+
+static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
+
+SYSCTL_INT(_machdep, OID_AUTO, independent_wallclock,
+    CTLFLAG_RW, &independent_wallclock, 0, "");
+SYSCTL_INT(_machdep, OID_AUTO, xen_disable_rtc_set,
+    CTLFLAG_RW, &xen_disable_rtc_set, 1, "");
+
+
+#define do_div(n,base) ({ \
+        unsigned long __upper, __low, __high, __mod, __base; \
+        __base = (base); \
+        __asm("":"=a" (__low), "=d" (__high):"A" (n)); \
+        __upper = __high; \
+        if (__high) { \
+                __upper = __high % (__base); \
+                __high = __high / (__base); \
+        } \
+        __asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
+        __asm("":"=A" (n):"a" (__low),"d" (__high)); \
+        __mod; \
+})
+
+
+#define NS_PER_TICK (1000000000ULL/hz)
+
+#define rdtscll(val) \
+    __asm__ __volatile__("rdtsc" : "=A" (val))
+
+
+/* convert from cycles(64bits) => nanoseconds (64bits)
+ *  basic equation:
+ *		ns = cycles / (freq / ns_per_sec)
+ *		ns = cycles * (ns_per_sec / freq)
+ *		ns = cycles * (10^9 / (cpu_mhz * 10^6))
+ *		ns = cycles * (10^3 / cpu_mhz)
+ *
+ *	Then we use scaling math (suggested by george@mvista.com) to get:
+ *		ns = cycles * (10^3 * SC / cpu_mhz) / SC
+ *		ns = cycles * cyc2ns_scale / SC
+ *
+ *	And since SC is a constant power of two, we can convert the div
+ *  into a shift.   
+ *			-johnstul@us.ibm.com "math is hard, lets go shopping!"
+ */
+static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
+{
+	cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
+}
+
+static inline unsigned long long cycles_2_ns(unsigned long long cyc)
+{
+	return (cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR;
+}
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline uint64_t 
+scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
+{
+	uint64_t product;
+	uint32_t tmp1, tmp2;
+
+	if ( shift < 0 )
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"add  %4,%%eax ; "
+		"xor  %5,%5    ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)), "2" (mul_frac) );
+
+	return product;
+}
+
+static uint64_t get_nsec_offset(struct shadow_time_info *shadow)
+{
+	uint64_t now, delta;
+	rdtscll(now);
+	delta = now - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+static void update_wallclock(void)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+
+	do {
+		shadow_tv_version = s->wc_version;
+		rmb();
+		shadow_tv.tv_sec  = s->wc_sec;
+		shadow_tv.tv_nsec = s->wc_nsec;
+		rmb();
+	}
+	while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version));
+
+}
+
+/*
+ * Reads a consistent set of time-base values from Xen, into a shadow data
+ * area. Must be called with the xtime_lock held for writing.
+ */
+static void __get_time_values_from_xen(void)
+{
+	shared_info_t           *s = HYPERVISOR_shared_info;
+	struct vcpu_time_info   *src;
+	struct shadow_time_info *dst;
+
+	src = &s->vcpu_info[PCPU_GET(cpuid)].time;
+	dst = PCPU_PTR(shadow_time);
+
+	do {
+		dst->version = src->version;
+		rmb();
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		rmb();
+	}
+	while ((src->version & 1) | (dst->version ^ src->version));
+
+	dst->tsc_to_usec_mul = dst->tsc_to_nsec_mul / 1000;
+}
+
+static inline int time_values_up_to_date(void)
+{
+	struct vcpu_time_info   *src;
+	struct shadow_time_info *dst;
+
+	src = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)].time; 
+	dst = PCPU_PTR(shadow_time); 
+
+	rmb();
+	return (dst->version == src->version);
+}
+
+static	unsigned xen_get_timecount(struct timecounter *tc);
+
+static struct timecounter xen_timecounter = {
+	xen_get_timecount,	/* get_timecount */
+	0,			/* no poll_pps */
+	~0u,			/* counter_mask */
+	0,			/* frequency */
+	"ixen",			/* name */
+	0			/* quality */
+};
+
+static void 
+clkintr(void *arg)
+{
+	int64_t delta_cpu, delta;
+	struct shadow_time_info *shadow = PCPU_PTR(shadow_time);
+	struct clockframe *frame = (struct clockframe *)arg;
+	
+	do {
+		__get_time_values_from_xen();
+		
+		delta = delta_cpu = 
+			shadow->system_timestamp + get_nsec_offset(shadow);
+		delta     -= processed_system_time;
+		delta_cpu -= PCPU_GET(processed_system_time);
+
+	} while (!time_values_up_to_date());
+	
+	if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) {
+		printf("Timer ISR: Time went backwards: %lld\n", delta);
+		return;
+	}
+	
+	/* Process elapsed ticks since last call. */
+	if (delta >= NS_PER_TICK) {
+		processed_system_time += (delta / NS_PER_TICK) * NS_PER_TICK;
+		*PCPU_PTR(processed_system_time) += (delta_cpu / NS_PER_TICK) * NS_PER_TICK;
+	}
+	hardclock(frame);
+
+	/*
+	 * Take synchronised time from Xen once a minute if we're not
+	 * synchronised ourselves, and we haven't chosen to keep an independent
+	 * time base.
+	 */
+	
+	if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
+		update_wallclock();
+		tc_setclock(&shadow_tv);
+	}
+	
+	/* XXX TODO */
+}
+
+static uint32_t
+getit(void)
+{
+	struct shadow_time_info *shadow;
+	shadow = PCPU_PTR(shadow_time);
+	__get_time_values_from_xen();
+	return shadow->system_timestamp + get_nsec_offset(shadow);
+}
+
+
+/*
+ * Wait "n" microseconds.
+ * Relies on timer 1 counting down from (timer_freq / hz)
+ * Note: timer had better have been programmed before this is first used!
+ */
+void
+DELAY(int n)
+{
+	int delta, ticks_left;
+	uint32_t tick, prev_tick;
+#ifdef DELAYDEBUG
+	int getit_calls = 1;
+	int n1;
+	static int state = 0;
+
+	if (state == 0) {
+		state = 1;
+		for (n1 = 1; n1 <= 10000000; n1 *= 10)
+			DELAY(n1);
+		state = 2;
+	}
+	if (state == 1)
+		printf("DELAY(%d)...", n);
+#endif
+	/*
+	 * Read the counter first, so that the rest of the setup overhead is
+	 * counted.  Guess the initial overhead is 20 usec (on most systems it
+	 * takes about 1.5 usec for each of the i/o's in getit().  The loop
+	 * takes about 6 usec on a 486/33 and 13 usec on a 386/20.  The
+	 * multiplications and divisions to scale the count take a while).
+	 *
+	 * However, if ddb is active then use a fake counter since reading
+	 * the i8254 counter involves acquiring a lock.  ddb must not go
+	 * locking for many reasons, but it calls here for at least atkbd
+	 * input.
+	 */
+	prev_tick = getit();
+
+	n -= 0;			/* XXX actually guess no initial overhead */
+	/*
+	 * Calculate (n * (timer_freq / 1e6)) without using floating point
+	 * and without any avoidable overflows.
+	 */
+	if (n <= 0)
+		ticks_left = 0;
+	else if (n < 256)
+		/*
+		 * Use fixed point to avoid a slow division by 1000000.
+		 * 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
+		 * 2^15 is the first power of 2 that gives exact results
+		 * for n between 0 and 256.
+		 */
+		ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
+	else
+		/*
+		 * Don't bother using fixed point, although gcc-2.7.2
+		 * generates particularly poor code for the long long
+		 * division, since even the slow way will complete long
+		 * before the delay is up (unless we're interrupted).
+		 */
+		ticks_left = ((u_int)n * (long long)timer_freq + 999999)
+			/ 1000000;
+
+	while (ticks_left > 0) {
+		tick = getit();
+#ifdef DELAYDEBUG
+		++getit_calls;
+#endif
+		delta = tick - prev_tick;
+		prev_tick = tick;
+		if (delta < 0) {
+			/*
+			 * Guard against timer0_max_count being wrong.
+			 * This shouldn't happen in normal operation,
+			 * but it may happen if set_timer_freq() is
+			 * traced.
+			 */
+			/* delta += timer0_max_count; ??? */
+			if (delta < 0)
+				delta = 0;
+		}
+		ticks_left -= delta;
+	}
+#ifdef DELAYDEBUG
+	if (state == 1)
+		printf(" %d calls to getit() at %d usec each\n",
+		       getit_calls, (n + 5) / getit_calls);
+#endif
+}
+
+
+int
+sysbeep(int pitch, int period)
+{
+	return (0);
+}
+
+/*
+ * Restore all the timers non-atomically (XXX: should be atomically).
+ *
+ * This function is called from pmtimer_resume() to restore all the timers.
+ * This should not be necessary, but there are broken laptops that do not
+ * restore all the timers on resume.
+ */
+void
+timer_restore(void)
+{
+	/* Get timebases for new environment. */ 
+	__get_time_values_from_xen();
+
+	/* Reset our own concept of passage of system time. */
+	processed_system_time = pcpu_find(0)->pc_shadow_time.system_timestamp;
+	pcpu_find(0)->pc_processed_system_time = processed_system_time;
+}
+
+void
+startrtclock()
+{
+	unsigned long long alarm;
+	uint64_t __cpu_khz;
+	uint32_t cpu_khz;
+	struct vcpu_time_info *info;
+
+	/* initialize xen values */
+	__get_time_values_from_xen();
+	processed_system_time = pcpu_find(0)->pc_shadow_time.system_timestamp;
+	pcpu_find(0)->pc_processed_system_time = processed_system_time;
+
+	__cpu_khz = 1000000ULL << 32;
+	info = &HYPERVISOR_shared_info->vcpu_info[0].time;
+
+	do_div(__cpu_khz, info->tsc_to_system_mul);
+	if ( info->tsc_shift < 0 )
+		cpu_khz = __cpu_khz << -info->tsc_shift;
+	else
+		cpu_khz = __cpu_khz >> info->tsc_shift;
+
+	printf("Xen reported: %u.%03u MHz processor.\n", 
+	       cpu_khz / 1000, cpu_khz % 1000);
+
+	/* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
+	   (2^32 * 1 / (clocks/us)) */
+	{	
+		unsigned long eax=0, edx=1000;
+		__asm__("divl %2"
+			:"=a" (cached_gtm), "=d" (edx)
+			:"r" (cpu_khz),
+			"0" (eax), "1" (edx));
+	}
+
+	set_cyc2ns_scale(cpu_khz/1000);
+	tsc_freq = cpu_khz * 1000;
+
+        timer_freq = xen_timecounter.tc_frequency = 1000000000LL;
+        tc_init(&xen_timecounter);
+
+
+	rdtscll(alarm);
+}
+
+#ifdef XEN_PRIVILEGED_GUEST
+/*
+ * RTC support routines
+ */
+
+int
+rtcin(reg)
+	int reg;
+{
+	u_char val;
+
+	RTC_LOCK;
+	outb(IO_RTC, reg);
+	inb(0x84);
+	val = inb(IO_RTC + 1);
+	inb(0x84);
+	RTC_UNLOCK;
+	return (val);
+}
+
+
+static __inline int
+readrtc(int port)
+{
+	return(bcd2bin(rtcin(port)));
+}
+
+void
+writertc(int reg, u_char val)
+{
+
+	RTC_LOCK;
+	if (rtc_reg != reg) {
+		inb(0x84);
+		outb(IO_RTC, reg);
+		rtc_reg = reg;
+		inb(0x84);
+	}
+	outb(IO_RTC + 1, val);
+	inb(0x84);
+	RTC_UNLOCK;
+}
+
+
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+static void
+domu_inittodr(time_t base)
+{
+	unsigned long   sec;
+	int		s, y;
+	struct timespec ts;
+
+	update_wallclock();
+	
+	RTC_LOCK;
+	
+	if (base) {
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		tc_setclock(&ts);
+	}
+
+	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
+
+	y = time_second - shadow_tv.tv_sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		tc_setclock(&shadow_tv);
+	}
+	RTC_UNLOCK;
+}
+
+/*
+ * Write system time back to RTC.  
+ */
+static void
+domu_resettodr(void)
+{
+	unsigned long tm;
+	int s;
+	dom0_op_t op;
+	struct shadow_time_info *shadow;
+
+	shadow = PCPU_PTR(shadow_time);
+	if (xen_disable_rtc_set)
+		return;
+	
+	s = splclock();
+	tm = time_second;
+	splx(s);
+	
+	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
+	
+	if ((xen_start_info->flags & SIF_INITDOMAIN) &&
+	    !independent_wallclock)
+	{
+		op.cmd = DOM0_SETTIME;
+		op.u.settime.secs        = tm;
+		op.u.settime.nsecs       = 0;
+		op.u.settime.system_time = shadow->system_timestamp;
+		HYPERVISOR_dom0_op(&op);
+		update_wallclock();
+	} else if (independent_wallclock) {
+		/* notyet */
+		;
+	}		
+}
+
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
+{
+	unsigned long	sec, days;
+	int		year, month;
+	int		y, m, s;
+	struct timespec ts;
+
+	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+	        domu_inittodr(base);
+		return;
+	}
+
+	if (base) {
+		s = splclock();
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		tc_setclock(&ts);
+		splx(s);
+	}
+
+	/* Look if we have a RTC present and the time is valid */
+	if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
+		goto wrong_time;
+
+	/* wait for time update to complete */
+	/* If RTCSA_TUP is zero, we have at least 244us before next update */
+	s = splhigh();
+	while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
+		splx(s);
+		s = splhigh();
+	}
+
+	days = 0;
+#ifdef USE_RTC_CENTURY
+	year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
+#else
+	year = readrtc(RTC_YEAR) + 1900;
+	if (year < 1970)
+		year += 100;
+#endif
+	if (year < 1970) {
+		splx(s);
+		goto wrong_time;
+	}
+	month = readrtc(RTC_MONTH);
+	for (m = 1; m < month; m++)
+		days += daysinmonth[m-1];
+	if ((month > 2) && LEAPYEAR(year))
+		days ++;
+	days += readrtc(RTC_DAY) - 1;
+	for (y = 1970; y < year; y++)
+		days += DAYSPERYEAR + LEAPYEAR(y);
+	sec = ((( days * 24 +
+		  readrtc(RTC_HRS)) * 60 +
+		readrtc(RTC_MIN)) * 60 +
+	       readrtc(RTC_SEC));
+	/* sec now contains the number of seconds, since Jan 1 1970,
+	   in the local time zone */
+
+	sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
+
+	y = time_second - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		tc_setclock(&ts);
+	}
+	splx(s);
+	return;
+
+ wrong_time:
+	printf("Invalid time in real time clock.\n");
+	printf("Check and reset the date immediately!\n");
+}
+
+
+
+/*
+ * Write system time back to RTC
+ */
+void
+resettodr()
+{
+	unsigned long	tm;
+	int		y, m, s;
+
+	if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
+	        domu_resettodr();
+		return;
+	}
+	       
+	if (xen_disable_rtc_set)
+		return;
+
+	s = splclock();
+	tm = time_second;
+	splx(s);
+
+	/* Disable RTC updates and interrupts. */
+	writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
+
+	/* Calculate local time to put in RTC */
+
+	tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
+
+	writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60;	/* Write back Seconds */
+	writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60;	/* Write back Minutes */
+	writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24;	/* Write back Hours   */
+
+	/* We have now the days since 01-01-1970 in tm */
+	writertc(RTC_WDAY, (tm + 4) % 7 + 1);		/* Write back Weekday */
+	for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
+	     tm >= m;
+	     y++,      m = DAYSPERYEAR + LEAPYEAR(y))
+		tm -= m;
+
+	/* Now we have the years in y and the day-of-the-year in tm */
+	writertc(RTC_YEAR, bin2bcd(y%100));		/* Write back Year    */
+#ifdef USE_RTC_CENTURY
+	writertc(RTC_CENTURY, bin2bcd(y/100));		/* ... and Century    */
+#endif
+	for (m = 0; ; m++) {
+		int ml;
+
+		ml = daysinmonth[m];
+		if (m == 1 && LEAPYEAR(y))
+			ml++;
+		if (tm < ml)
+			break;
+		tm -= ml;
+	}
+
+	writertc(RTC_MONTH, bin2bcd(m + 1));            /* Write back Month   */
+	writertc(RTC_DAY, bin2bcd(tm + 1));             /* Write back Month Day */
+
+	/* Reenable RTC updates and interrupts. */
+	writertc(RTC_STATUSB, RTCSB_24HR);
+	rtcin(RTC_INTR);
+}
+#else
+/*
+ * Initialize the time of day register, based on the time base which is, e.g.
+ * from a filesystem.
+ */
+void
+inittodr(time_t base)
+{
+	int		s, y;
+	struct timespec ts;
+
+	s = splclock();
+	if (base) {
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		tc_setclock(&ts);
+	}
+
+	y = time_second - shadow_tv.tv_sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		ts.tv_sec = shadow_tv.tv_sec;
+		ts.tv_nsec = shadow_tv.tv_nsec * 1000000000; /* :-/ */
+		tc_setclock(&ts);
+	}
+	splx(s);
+}
+
+/*
+ * Write system time back to RTC.  Not supported for guest domains.
+ */
+void
+resettodr()
+{
+}
+#endif
+
+
+int
+acquire_timer2(int mode)
+{
+
+	if (timer2_state != RELEASED)
+		return (-1);
+	timer2_state = ACQUIRED;
+
+	/*
+	 * This access to the timer registers is as atomic as possible
+	 * because it is a single instruction.  We could do better if we
+	 * knew the rate.  Use of splclock() limits glitches to 10-100us,
+	 * and this is probably good enough for timer2, so we aren't as
+	 * careful with it as with timer0.
+	 */
+	outb(TIMER_MODE, TIMER_SEL2 | (mode & 0x3f));
+
+	return (0);
+}
+
+int
+release_timer2()
+{
+
+	if (timer2_state != ACQUIRED)
+		return (-1);
+	timer2_state = RELEASED;
+	outb(TIMER_MODE, TIMER_SEL2 | TIMER_SQWAVE | TIMER_16BIT);
+	return (0);
+}
+
+static struct vcpu_set_periodic_timer xen_set_periodic_tick;
+
+/*
+ * Start clocks running.
+ */
+void
+cpu_initclocks(void)
+{
+	int error;
+	
+	xen_set_periodic_tick.period_ns = NS_PER_TICK;
+
+	HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
+			   &xen_set_periodic_tick);
+
+	if (time_irq)
+		unbind_from_irqhandler(time_irq);
+	time_irq = 0;
+
+        error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", 
+		                                clkintr,
+	    INTR_TYPE_CLK | INTR_FAST, &time_irq);
+	if (error)
+		panic("failed to register clock interrupt\n");
+
+	/* should fast clock be enabled ? */
+}
+
+int
+ap_cpu_initclocks(int cpu)
+{
+	unsigned int time_irq;
+	int error;
+
+	xen_set_periodic_tick.period_ns = NS_PER_TICK;
+
+	HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
+			   &xen_set_periodic_tick);
+        error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", 
+		                                clkintr, 
+	    INTR_TYPE_CLK | INTR_FAST, &time_irq);
+	if (error)
+		panic("failed to register clock interrupt\n");
+
+
+	return (0);
+}
+
+void
+cpu_startprofclock(void)
+{
+
+    	printf("cpu_startprofclock: profiling clock is not supported\n");
+}
+
+void
+cpu_stopprofclock(void)
+{
+
+    	printf("cpu_stopprofclock: profiling clock is not supported\n");
+}
+#define NSEC_PER_USEC 1000
+
+static uint32_t
+xen_get_timecount(struct timecounter *tc)
+{	
+	uint64_t clk;
+	struct shadow_time_info *shadow = PCPU_PTR(shadow_time);
+
+	__get_time_values_from_xen();
+	
+        clk = shadow->system_timestamp + get_nsec_offset(shadow);
+
+	return (uint32_t)((clk / NS_PER_TICK) * NS_PER_TICK);
+
+}
+
+/* Return system time offset by ticks */
+uint64_t
+get_system_time(int ticks)
+{
+    return processed_system_time + (ticks * NS_PER_TICK);
+}
+
+/*
+ * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
+ */
+
+#if 0
+static uint32_t
+xen_get_offset(void)
+{
+	register unsigned long eax, edx;
+
+	/* Read the Time Stamp Counter */
+
+	rdtsc(eax,edx);
+
+	/* .. relative to previous jiffy (32 bits is enough) */
+	eax -= shadow_tsc_stamp;
+
+	/*
+	 * Time offset = (tsc_low delta) * cached_gtm
+	 *             = (tsc_low delta) * (usecs_per_clock)
+	 *             = (tsc_low delta) * (usecs_per_jiffy / clocks_per_jiffy)
+	 *
+	 * Using a mull instead of a divl saves up to 31 clock cycles
+	 * in the critical path.
+	 */
+
+	__asm__("mull %2"
+		:"=a" (eax), "=d" (edx)
+		:"rm" (cached_gtm),
+		"0" (eax));
+
+	/* our adjusted time offset in microseconds */
+	return edx;
+}
+#endif
+void
+idle_block(void)
+{
+	int err;
+
+	__get_time_values_from_xen();
+	err = HYPERVISOR_set_timer_op(processed_system_time + NS_PER_TICK);
+	KASSERT(err == 0, ("set_timer_op failed"));
+	HYPERVISOR_sched_op(SCHEDOP_block, 0);
+}

Property changes on: i386/xen/clock.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/xen_machdep.c
===================================================================
--- i386/xen/xen_machdep.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/xen_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1280 @@
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004-2006,2008 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/lock.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/kernel.h>
+#include <sys/reboot.h>
+#include <sys/sysproto.h>
+
+#include <machine/xen/xen-os.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+#include <machine/segments.h>
+#include <machine/pcb.h>
+#include <machine/stdarg.h>
+#include <machine/vmparam.h>
+#include <machine/cpu.h>
+#include <machine/intr_machdep.h>
+#include <machine/md_var.h>
+#include <machine/asmacros.h>
+
+
+
+#include <xen/hypervisor.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+#include <machine/xen/xenpmap.h>
+#include <machine/xen/xenfunc.h>
+#include <xen/interface/memory.h>
+#include <xen/features.h>
+#ifdef SMP
+#include <machine/privatespace.h>
+#endif
+
+
+#include <vm/vm_page.h>
+
+#define	IDTVEC(name)	__CONCAT(X,name)
+
+extern inthand_t
+IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl),
+	IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm),
+	IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot),
+	IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align),
+	IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
+
+
+int xendebug_flags; 
+start_info_t *xen_start_info;
+shared_info_t *HYPERVISOR_shared_info;
+xen_pfn_t *xen_machine_phys = machine_to_phys_mapping;
+xen_pfn_t *xen_phys_machine;
+xen_pfn_t *xen_pfn_to_mfn_frame_list[16];
+xen_pfn_t *xen_pfn_to_mfn_frame_list_list;
+int preemptable, init_first;
+extern unsigned int avail_space;
+
+static void printk(const char *fmt, ...);
+
+void ni_cli(void);
+void ni_sti(void);
+
+
+void
+ni_cli(void)
+{
+	__asm__("pushl %edx;"
+		"pushl %eax;"
+		);
+	__cli();
+	__asm__("popl %eax;"
+		"popl %edx;"
+		);
+}
+
+
+void
+ni_sti(void)
+{
+	__asm__("pushl %edx;"
+		"pushl %esi;"
+		"pushl %eax;"
+		);
+	__sti();
+	__asm__("popl %eax;"
+		"popl %esi;"
+		"popl %edx;"
+		);
+}
+
+/*
+ * Modify the cmd_line by converting ',' to NULLs so that it is in a  format 
+ * suitable for the static env vars.
+ */
+char *
+xen_setbootenv(char *cmd_line)
+{
+	char *cmd_line_next;
+    
+        /* Skip leading spaces */
+        for (; *cmd_line == ' '; cmd_line++);
+
+	printk("xen_setbootenv(): cmd_line='%s'\n", cmd_line);
+
+	for (cmd_line_next = cmd_line; strsep(&cmd_line_next, ",") != NULL;);
+	return cmd_line;
+}
+
+static struct 
+{
+	const char	*ev;
+	int		mask;
+} howto_names[] = {
+	{"boot_askname",	RB_ASKNAME},
+	{"boot_single",	RB_SINGLE},
+	{"boot_nosync",	RB_NOSYNC},
+	{"boot_halt",	RB_ASKNAME},
+	{"boot_serial",	RB_SERIAL},
+	{"boot_cdrom",	RB_CDROM},
+	{"boot_gdb",	RB_GDB},
+	{"boot_gdb_pause",	RB_RESERVED1},
+	{"boot_verbose",	RB_VERBOSE},
+	{"boot_multicons",	RB_MULTIPLE},
+	{NULL,	0}
+};
+
+int 
+xen_boothowto(char *envp)
+{
+	int i, howto = 0;
+
+	/* get equivalents from the environment */
+	for (i = 0; howto_names[i].ev != NULL; i++)
+		if (getenv(howto_names[i].ev) != NULL)
+			howto |= howto_names[i].mask;
+	return howto;
+}
+
+#define PRINTK_BUFSIZE 1024
+static void
+printk(const char *fmt, ...)
+{
+        __va_list ap;
+        int retval;
+        static char buf[PRINTK_BUFSIZE];
+
+	return;
+	
+        va_start(ap, fmt);
+        retval = vsnprintf(buf, PRINTK_BUFSIZE - 1, fmt, ap);
+        va_end(ap);
+        buf[retval] = 0;
+        (void)HYPERVISOR_console_write(buf, retval);
+}
+
+
+#define XPQUEUE_SIZE 128
+
+struct mmu_log {
+	char *file;
+	int line;
+};
+
+#ifdef SMP
+/* per-cpu queues and indices */
+#ifdef INVARIANTS
+static struct mmu_log xpq_queue_log[MAX_VIRT_CPUS][XPQUEUE_SIZE];
+#endif
+
+static int xpq_idx[MAX_VIRT_CPUS];  
+static mmu_update_t xpq_queue[MAX_VIRT_CPUS][XPQUEUE_SIZE];
+
+#define XPQ_QUEUE xpq_queue[vcpu]
+#define XPQ_IDX xpq_idx[vcpu]
+#define SET_VCPU() int vcpu = gdtset ? PCPU_GET(cpuid) : 0
+
+#define XPQ_QUEUE_LOG xpq_queue_log[vcpu]
+#else
+	
+static mmu_update_t xpq_queue[XPQUEUE_SIZE];
+static struct mmu_log xpq_queue_log[XPQUEUE_SIZE];
+static int xpq_idx = 0;
+
+#define XPQ_QUEUE_LOG xpq_queue_log
+#define XPQ_QUEUE xpq_queue
+#define XPQ_IDX xpq_idx
+#define SET_VCPU()
+#endif /* !SMP */
+
+#define XPQ_IDX_INC atomic_add_int(&XPQ_IDX, 1);
+
+#if 0
+static void
+xen_dump_queue(void)
+{
+	int _xpq_idx = XPQ_IDX;
+	int i;
+
+	if (_xpq_idx <= 1)
+		return;
+
+	printk("xen_dump_queue(): %u entries\n", _xpq_idx);
+	for (i = 0; i < _xpq_idx; i++) {
+		printk(" val: %llx ptr: %llx\n", XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
+	}
+}
+#endif
+
+
+static __inline void
+_xen_flush_queue(void)
+{
+	SET_VCPU();
+	int _xpq_idx = XPQ_IDX;
+	int error, i;
+	/* window of vulnerability here? */
+
+	if (__predict_true(gdtset))
+		critical_enter();
+	XPQ_IDX = 0;
+	/* Make sure index is cleared first to avoid double updates. */
+	error = HYPERVISOR_mmu_update((mmu_update_t *)&XPQ_QUEUE,
+				      _xpq_idx, NULL, DOMID_SELF);
+    
+#if 0
+	if (__predict_true(gdtset))
+	for (i = _xpq_idx; i > 0;) {
+		if (i >= 3) {
+			CTR6(KTR_PMAP, "mmu:val: %lx ptr: %lx val: %lx "
+			    "ptr: %lx val: %lx ptr: %lx",
+			    (XPQ_QUEUE[i-1].val & 0xffffffff),
+			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
+			    (XPQ_QUEUE[i-2].val & 0xffffffff),
+			    (XPQ_QUEUE[i-2].ptr & 0xffffffff),
+			    (XPQ_QUEUE[i-3].val & 0xffffffff),
+			    (XPQ_QUEUE[i-3].ptr & 0xffffffff));
+			    i -= 3;
+		} else if (i == 2) {
+			CTR4(KTR_PMAP, "mmu: val: %lx ptr: %lx val: %lx ptr: %lx",
+			    (XPQ_QUEUE[i-1].val & 0xffffffff),
+			    (XPQ_QUEUE[i-1].ptr & 0xffffffff),
+			    (XPQ_QUEUE[i-2].val & 0xffffffff),
+			    (XPQ_QUEUE[i-2].ptr & 0xffffffff));
+			i = 0;
+		} else {
+			CTR2(KTR_PMAP, "mmu: val: %lx ptr: %lx", 
+			    (XPQ_QUEUE[i-1].val & 0xffffffff),
+			    (XPQ_QUEUE[i-1].ptr & 0xffffffff));
+			i = 0;
+		}
+	}
+#endif	
+	if (__predict_true(gdtset))
+		critical_exit();
+	if (__predict_false(error < 0)) {
+		for (i = 0; i < _xpq_idx; i++)
+			printf("val: %llx ptr: %llx\n",
+			    XPQ_QUEUE[i].val, XPQ_QUEUE[i].ptr);
+		panic("Failed to execute MMU updates: %d", error);
+	}
+
+}
+
+void
+xen_flush_queue(void)
+{
+	SET_VCPU();
+	if (XPQ_IDX != 0) _xen_flush_queue();
+}
+
+static __inline void
+xen_increment_idx(void)
+{
+	SET_VCPU();
+
+	XPQ_IDX++;
+	if (__predict_false(XPQ_IDX == XPQUEUE_SIZE))
+		xen_flush_queue();
+}
+
+void
+xen_check_queue(void)
+{
+#ifdef INVARIANTS
+	SET_VCPU();
+	
+	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
+#endif
+}
+
+void
+xen_invlpg(vm_offset_t va)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_INVLPG_ALL;
+	op.arg1.linear_addr = va & ~PAGE_MASK;
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void
+xen_load_cr3(u_int val)
+{
+	struct mmuext_op op;
+	int err;
+#ifdef INVARIANTS
+	SET_VCPU();
+	
+	KASSERT(XPQ_IDX == 0, ("pending operations XPQ_IDX=%d", XPQ_IDX));
+#endif
+	op.cmd = MMUEXT_NEW_BASEPTR;
+	op.arg1.mfn = xpmap_ptom(val) >> PAGE_SHIFT;
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void
+xen_restore_flags(u_int eflags)
+{
+
+	if (eflags > 1)
+		eflags = ((eflags & PSL_I) == 0);
+
+	__restore_flags(eflags);
+}
+
+int
+xen_save_and_cli(void)
+{
+	int eflags;
+	
+	__save_and_cli(eflags);
+	return (eflags);
+}
+
+void
+xen_cli(void)
+{
+	__cli();
+}
+
+void
+xen_sti(void)
+{
+	__sti();
+}
+
+u_int
+xen_rcr2(void)
+{
+
+	return (HYPERVISOR_shared_info->vcpu_info[curcpu].arch.cr2);
+}
+
+void
+_xen_machphys_update(vm_paddr_t mfn, vm_paddr_t pfn, char *file, int line)
+{
+	SET_VCPU();
+	
+	if (__predict_true(gdtset))
+		critical_enter();
+	XPQ_QUEUE[XPQ_IDX].ptr = (mfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
+	XPQ_QUEUE[XPQ_IDX].val = pfn;
+#ifdef INVARIANTS
+	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
+	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
+#endif		
+	xen_increment_idx();
+	if (__predict_true(gdtset))
+		critical_exit();
+}
+
+void
+_xen_queue_pt_update(vm_paddr_t ptr, vm_paddr_t val, char *file, int line)
+{
+	SET_VCPU();
+#if 0
+	if (__predict_true(gdtset))	
+		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+#endif
+	
+	KASSERT((ptr & 7) == 0, ("misaligned update"));
+	
+	if (__predict_true(gdtset))
+		critical_enter();
+	
+	XPQ_QUEUE[XPQ_IDX].ptr = ((uint64_t)ptr) | MMU_NORMAL_PT_UPDATE;
+	XPQ_QUEUE[XPQ_IDX].val = (uint64_t)val;
+#ifdef INVARIANTS
+	XPQ_QUEUE_LOG[XPQ_IDX].file = file;
+	XPQ_QUEUE_LOG[XPQ_IDX].line = line;	
+#endif	
+	xen_increment_idx();
+	if (__predict_true(gdtset))
+		critical_exit();
+}
+
+void 
+xen_pgdpt_pin(vm_paddr_t ma)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_PIN_L3_TABLE;
+	op.arg1.mfn = ma >> PAGE_SHIFT;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void 
+xen_pgd_pin(vm_paddr_t ma)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_PIN_L2_TABLE;
+	op.arg1.mfn = ma >> PAGE_SHIFT;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void 
+xen_pgd_unpin(vm_paddr_t ma)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_UNPIN_TABLE;
+	op.arg1.mfn = ma >> PAGE_SHIFT;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void 
+xen_pt_pin(vm_paddr_t ma)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_PIN_L1_TABLE;
+	op.arg1.mfn = ma >> PAGE_SHIFT;
+	printk("xen_pt_pin(): mfn=%x\n", op.arg1.mfn);
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void 
+xen_pt_unpin(vm_paddr_t ma)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_UNPIN_TABLE;
+	op.arg1.mfn = ma >> PAGE_SHIFT;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void 
+xen_set_ldt(vm_paddr_t ptr, unsigned long len)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_SET_LDT;
+	op.arg1.linear_addr = ptr;
+	op.arg2.nr_ents = len;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void xen_tlb_flush(void)
+{
+	struct mmuext_op op;
+	int err;
+	op.cmd = MMUEXT_TLB_FLUSH_LOCAL;
+	xen_flush_queue();
+	err = HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF);
+	KASSERT(err >= 0, ("mmuext_op failed"));
+}
+
+void
+xen_update_descriptor(union descriptor *table, union descriptor *entry)
+{
+	vm_paddr_t pa;
+	pt_entry_t *ptp;
+
+	ptp = vtopte((vm_offset_t)table);
+	pa = (*ptp & PG_FRAME) | ((vm_offset_t)table & PAGE_MASK);
+	if (HYPERVISOR_update_descriptor(pa, *(uint64_t *)entry))
+		panic("HYPERVISOR_update_descriptor failed\n");
+}
+
+
+#if 0
+/*
+ * Bitmap is indexed by page number. If bit is set, the page is part of a
+ * xen_create_contiguous_region() area of memory.
+ */
+unsigned long *contiguous_bitmap;
+
+static void 
+contiguous_bitmap_set(unsigned long first_page, unsigned long nr_pages)
+{
+	unsigned long start_off, end_off, curr_idx, end_idx;
+
+	curr_idx  = first_page / BITS_PER_LONG;
+	start_off = first_page & (BITS_PER_LONG-1);
+	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
+	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+	if (curr_idx == end_idx) {
+		contiguous_bitmap[curr_idx] |=
+			((1UL<<end_off)-1) & -(1UL<<start_off);
+	} else {
+		contiguous_bitmap[curr_idx] |= -(1UL<<start_off);
+		while ( ++curr_idx < end_idx )
+			contiguous_bitmap[curr_idx] = ~0UL;
+		contiguous_bitmap[curr_idx] |= (1UL<<end_off)-1;
+	}
+}
+
+static void 
+contiguous_bitmap_clear(unsigned long first_page, unsigned long nr_pages)
+{
+	unsigned long start_off, end_off, curr_idx, end_idx;
+
+	curr_idx  = first_page / BITS_PER_LONG;
+	start_off = first_page & (BITS_PER_LONG-1);
+	end_idx   = (first_page + nr_pages) / BITS_PER_LONG;
+	end_off   = (first_page + nr_pages) & (BITS_PER_LONG-1);
+
+	if (curr_idx == end_idx) {
+		contiguous_bitmap[curr_idx] &=
+			-(1UL<<end_off) | ((1UL<<start_off)-1);
+	} else {
+		contiguous_bitmap[curr_idx] &= (1UL<<start_off)-1;
+		while ( ++curr_idx != end_idx )
+			contiguous_bitmap[curr_idx] = 0;
+		contiguous_bitmap[curr_idx] &= -(1UL<<end_off);
+	}
+}
+#endif
+
+/* Ensure multi-page extents are contiguous in machine memory. */
+int 
+xen_create_contiguous_region(vm_page_t pages, int npages)
+{
+	unsigned long  mfn, i, flags;
+	int order, err;
+	struct xen_memory_reservation reservation = {
+		.nr_extents   = 1,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	set_xen_guest_handle(reservation.extent_start, &mfn);
+	
+	balloon_lock(flags);
+
+	/* can currently only handle power of two allocation */
+	KASSERT(ffs(npages) == fls(npages), ("unexpected page count"));
+
+	/* 0. determine order */
+	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
+	
+	/* 1. give away machine pages. */
+	for (i = 0; i < (1 << order); i++) {
+		int pfn;
+		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
+		mfn = PFNTOMFN(pfn);
+		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+		err = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
+		KASSERT(err == 1, ("memory_op failed"));
+	}
+
+
+	/* 2. Get a new contiguous memory extent. */
+	reservation.extent_order = order;
+	/* xenlinux hardcodes this because of aacraid - maybe set to 0 if we're not 
+	 * running with a broxen driver XXXEN
+	 */
+	reservation.address_bits = 31; 
+	if (HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation) != 1)
+		goto fail;
+
+	/* 3. Map the new extent in place of old pages. */
+	for (i = 0; i < (1 << order); i++) {
+		int pfn;
+		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
+		xen_machphys_update(mfn+i, pfn);
+		PFNTOMFN(pfn) = mfn+i;
+	}
+
+	xen_tlb_flush();
+
+#if 0
+	contiguous_bitmap_set(VM_PAGE_TO_PHYS(&pages[0]) >> PAGE_SHIFT, 1UL << order);
+#endif
+
+	balloon_unlock(flags);
+
+	return 0;
+
+ fail:
+	reservation.extent_order = 0;
+	reservation.address_bits = 0;
+
+	for (i = 0; i < (1 << order); i++) {
+		int pfn;
+		pfn = VM_PAGE_TO_PHYS(&pages[i]) >> PAGE_SHIFT;
+		err = HYPERVISOR_memory_op(
+			XENMEM_increase_reservation, &reservation);
+		KASSERT(err == 1, ("memory_op failed"));
+		xen_machphys_update(mfn, pfn);
+		PFNTOMFN(pfn) = mfn;
+	}
+
+	xen_tlb_flush();
+
+	balloon_unlock(flags);
+
+	return ENOMEM;
+}
+
+void 
+xen_destroy_contiguous_region(void *addr, int npages)
+{
+	unsigned long  mfn, i, flags, order, pfn0;
+	int err;
+	struct xen_memory_reservation reservation = {
+		.nr_extents   = 1,
+		.extent_order = 0,
+		.domid        = DOMID_SELF
+	};
+	set_xen_guest_handle(reservation.extent_start, &mfn);
+	
+	pfn0 = vtophys(addr) >> PAGE_SHIFT;
+#if 0
+	scrub_pages(vstart, 1 << order);
+#endif
+	/* can currently only handle power of two allocation */
+	KASSERT(ffs(npages) == fls(npages), ("non-power of 2 page count"));
+
+	/* 0. determine order */
+	order = (ffs(npages) == fls(npages)) ? fls(npages) - 1 : fls(npages);
+
+	balloon_lock(flags);
+
+#if 0
+	contiguous_bitmap_clear(vtophys(addr) >> PAGE_SHIFT, 1UL << order);
+#endif
+
+	/* 1. Zap current PTEs, giving away the underlying pages. */
+	for (i = 0; i < (1 << order); i++) {
+		int pfn;
+		uint64_t new_val = 0;
+		pfn = vtomach((char *)addr + i*PAGE_SIZE) >> PAGE_SHIFT;
+
+		err = HYPERVISOR_update_va_mapping((vm_offset_t)((char *)addr + (i * PAGE_SIZE)), new_val, 0);
+		KASSERT(err == 0, ("update_va_mapping failed")); 
+		PFNTOMFN(pfn) = INVALID_P2M_ENTRY;
+		err = HYPERVISOR_memory_op(
+			XENMEM_decrease_reservation, &reservation);
+		KASSERT(err == 1, ("memory_op failed"));
+	}
+
+	/* 2. Map new pages in place of old pages. */
+	for (i = 0; i < (1 << order); i++) {
+		int pfn;
+		uint64_t new_val;
+		pfn = pfn0 + i;
+		err = HYPERVISOR_memory_op(XENMEM_increase_reservation, &reservation);
+		KASSERT(err == 1, ("memory_op failed"));
+		
+		new_val = mfn << PAGE_SHIFT;
+		err = HYPERVISOR_update_va_mapping(
+			(vm_offset_t)addr + (i * PAGE_SIZE), 
+			new_val, PG_KERNEL);
+		KASSERT(err == 0, ("update_va_mapping failed"));
+		xen_machphys_update(mfn, pfn);
+		PFNTOMFN(pfn) = mfn;
+	}
+
+	xen_tlb_flush();
+
+	balloon_unlock(flags);
+}
+
+extern unsigned long cpu0prvpage;
+extern unsigned long *SMPpt;
+extern  struct user	*proc0uarea;
+extern  vm_offset_t	proc0kstack;
+extern int vm86paddr, vm86phystk;
+char *bootmem_start, *bootmem_current, *bootmem_end;
+
+pteinfo_t *pteinfo_list;
+void initvalues(start_info_t *startinfo);
+
+struct ringbuf_head *xen_store; /* XXX move me */
+char *console_page;
+
+void *
+bootmem_alloc(unsigned int size) 
+{
+	char *retptr;
+	
+	retptr = bootmem_current;
+	KASSERT(retptr + size <= bootmem_end, ("bootmem_alloc failed"));
+	bootmem_current += size;
+
+	return retptr;
+}
+
+void 
+bootmem_free(void *ptr, unsigned int size) 
+{
+	char *tptr;
+	
+	tptr = ptr;
+	KASSERT(tptr == bootmem_current - size &&
+	    bootmem_current - size >= bootmem_start,
+	    ("bootmem_free failed"));
+
+	bootmem_current -= size;
+}
+
+#if 0
+static vm_paddr_t
+xpmap_mtop2(vm_paddr_t mpa)
+{
+        return ((machine_to_phys_mapping[mpa >> PAGE_SHIFT] << PAGE_SHIFT)
+            ) | (mpa & ~PG_FRAME);
+}
+
+static pd_entry_t 
+xpmap_get_bootpde(vm_paddr_t va)
+{
+
+        return ((pd_entry_t *)xen_start_info->pt_base)[va >> 22];
+}
+
+static pd_entry_t
+xpmap_get_vbootpde(vm_paddr_t va)
+{
+        pd_entry_t pde;
+
+        pde = xpmap_get_bootpde(va);
+        if ((pde & PG_V) == 0)
+                return (pde & ~PG_FRAME);
+        return (pde & ~PG_FRAME) |
+                (xpmap_mtop2(pde & PG_FRAME) + KERNBASE);
+}
+
+static pt_entry_t 8*
+xpmap_get_bootptep(vm_paddr_t va)
+{
+        pd_entry_t pde;
+
+        pde = xpmap_get_vbootpde(va);
+        if ((pde & PG_V) == 0)
+                return (void *)-1;
+#define PT_MASK         0x003ff000      /* page table address bits */
+        return &(((pt_entry_t *)(pde & PG_FRAME))[(va & PT_MASK) >> PAGE_SHIFT]);
+}
+
+static pt_entry_t
+xpmap_get_bootpte(vm_paddr_t va)
+{
+
+        return xpmap_get_bootptep(va)[0];
+}
+#endif
+
+
+#ifdef ADD_ISA_HOLE
+static void
+shift_phys_machine(unsigned long *phys_machine, int nr_pages)
+{
+
+        unsigned long *tmp_page, *current_page, *next_page;
+	int i;
+
+	tmp_page = bootmem_alloc(PAGE_SIZE);
+	current_page = phys_machine + nr_pages - (PAGE_SIZE/sizeof(unsigned long));  
+	next_page = current_page - (PAGE_SIZE/sizeof(unsigned long));  
+	bcopy(phys_machine, tmp_page, PAGE_SIZE);
+
+	while (current_page > phys_machine) { 
+	        /*  save next page */
+	        bcopy(next_page, tmp_page, PAGE_SIZE);
+	        /* shift down page */
+		bcopy(current_page, next_page, PAGE_SIZE);
+	        /*  finish swap */
+	        bcopy(tmp_page, current_page, PAGE_SIZE);
+	  
+		current_page -= (PAGE_SIZE/sizeof(unsigned long));
+		next_page -= (PAGE_SIZE/sizeof(unsigned long));
+	}
+	bootmem_free(tmp_page, PAGE_SIZE);	
+	
+	for (i = 0; i < nr_pages; i++) {
+	        xen_machphys_update(phys_machine[i], i);
+	}
+	memset(phys_machine, INVALID_P2M_ENTRY, PAGE_SIZE);
+
+}
+#endif /* ADD_ISA_HOLE */
+
+/*
+ * Build a directory of the pages that make up our Physical to Machine
+ * mapping table. The Xen suspend/restore code uses this to find our
+ * mapping table.
+ */
+static void
+init_frame_list_list(void *arg)
+{
+	unsigned long nr_pages = xen_start_info->nr_pages;
+#define FPP	(PAGE_SIZE/sizeof(xen_pfn_t))
+	int i, j, k;
+
+	xen_pfn_to_mfn_frame_list_list = malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+	for (i = 0, j = 0, k = -1; i < nr_pages;
+	     i += FPP, j++) {
+		if ((j & (FPP - 1)) == 0) {
+			k++;
+			xen_pfn_to_mfn_frame_list[k] =
+				malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+			xen_pfn_to_mfn_frame_list_list[k] =
+				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
+			j = 0;
+		}
+		xen_pfn_to_mfn_frame_list[k][j] = 
+			VTOMFN(&xen_phys_machine[i]);
+	}
+
+	HYPERVISOR_shared_info->arch.max_pfn = nr_pages;
+	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list
+		= VTOMFN(xen_pfn_to_mfn_frame_list_list);
+}	
+SYSINIT(init_fll, SI_SUB_DEVFS, SI_ORDER_ANY, init_frame_list_list, NULL);
+
+extern unsigned long physfree;
+
+int pdir, curoffset;
+extern int nkpt;
+
+void
+initvalues(start_info_t *startinfo)
+{ 
+	int l3_pages, l2_pages, l1_pages, offset;
+	vm_offset_t cur_space, cur_space_pt;
+	struct physdev_set_iopl set_iopl;
+	
+	vm_paddr_t KPTphys, IdlePTDma;
+	vm_paddr_t console_page_ma, xen_store_ma;
+	vm_offset_t KPTphysoff, tmpva;
+	vm_paddr_t shinfo;
+#ifdef PAE
+	vm_paddr_t IdlePDPTma, IdlePDPTnewma;
+	vm_paddr_t IdlePTDnewma[4];
+	pd_entry_t *IdlePDPTnew, *IdlePTDnew;
+#else
+	vm_paddr_t pdir_shadow_ma;
+#endif
+	unsigned long i;
+	int ncpus, err;
+
+	nkpt = min(
+		min(
+			max((startinfo->nr_pages >> NPGPTD_SHIFT), nkpt),
+		    NPGPTD*NPDEPG - KPTDI),
+		    (HYPERVISOR_VIRT_START - KERNBASE) >> PDRSHIFT);
+	
+#ifdef SMP
+	ncpus = MAXCPU;
+#else
+	ncpus = 1;
+#endif	
+
+	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);	
+#ifdef notyet
+	/*
+	 * need to install handler
+	 */
+	HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments_notify);	
+#endif	
+	xen_start_info = startinfo;
+	xen_phys_machine = (xen_pfn_t *)startinfo->mfn_list;
+
+	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
+	l1_pages = 0;
+	
+#ifdef PAE
+	l3_pages = 1;
+	l2_pages = 0;
+	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
+	IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
+	for (i = (KERNBASE >> 30);
+	     (i < 4) && (IdlePDPT[i] != 0); i++)
+			l2_pages++;
+	/*
+	 * Note that only one page directory has been allocated at this point.
+	 * Thus, if KERNBASE
+	 */
+#if 0
+	for (i = 0; i < l2_pages; i++)
+		IdlePTDma[i] = xpmap_ptom(VTOP(IdlePTD + i*PAGE_SIZE));
+#endif
+	
+	l2_pages = (l2_pages == 0) ? 1 : l2_pages;
+#else	
+	l3_pages = 0;
+	l2_pages = 1;
+#endif
+	for (i = (((KERNBASE>>18) & PAGE_MASK)>>PAGE_SHIFT);
+	     (i<l2_pages*NPDEPG) && (i<(VM_MAX_KERNEL_ADDRESS>>PDRSHIFT)); i++) {
+		
+		if (IdlePTD[i] == 0)
+			break;
+		l1_pages++;
+	}
+	
+	/* number of pages allocated after the pts + 1*/;
+	cur_space = xen_start_info->pt_base +
+	    ((xen_start_info->nr_pt_frames) + 3 )*PAGE_SIZE;
+	printk("initvalues(): wooh - availmem=%x,%x\n", avail_space, cur_space);
+
+	printk("KERNBASE=%x,pt_base=%x, VTOPFN(base)=%x, nr_pt_frames=%x\n",
+	    KERNBASE,xen_start_info->pt_base, VTOPFN(xen_start_info->pt_base),
+	    xen_start_info->nr_pt_frames);
+	xendebug_flags = 0; /* 0xffffffff; */
+
+	/* allocate 4 pages for bootmem allocator */
+	bootmem_start = bootmem_current = (char *)cur_space;
+	cur_space += (4 * PAGE_SIZE);
+	bootmem_end = (char *)cur_space;
+	
+	/* allocate page for gdt */
+	gdt = (union descriptor *)cur_space;
+	cur_space += PAGE_SIZE*ncpus;
+
+        /* allocate page for ldt */
+	ldt = (union descriptor *)cur_space; cur_space += PAGE_SIZE;
+	cur_space += PAGE_SIZE;
+	
+	HYPERVISOR_shared_info = (shared_info_t *)cur_space;
+	cur_space += PAGE_SIZE;
+
+	xen_store = (struct ringbuf_head *)cur_space;
+	cur_space += PAGE_SIZE;
+
+	console_page = (char *)cur_space;
+	cur_space += PAGE_SIZE;
+
+#ifdef ADD_ISA_HOLE
+	shift_phys_machine(xen_phys_machine, xen_start_info->nr_pages);
+#endif
+	/* 
+	 * pre-zero unused mapped pages - mapped on 4MB boundary
+	 */
+#ifdef PAE
+	IdlePDPT = (pd_entry_t *)startinfo->pt_base;
+	IdlePDPTma = xpmap_ptom(VTOP(startinfo->pt_base));
+	/*
+	 * Note that only one page directory has been allocated at this point.
+	 * Thus, if KERNBASE
+	 */
+	IdlePTD = (pd_entry_t *)((uint8_t *)startinfo->pt_base + PAGE_SIZE);
+	IdlePTDma = xpmap_ptom(VTOP(IdlePTD));
+	l3_pages = 1;
+#else	
+	IdlePTD = (pd_entry_t *)startinfo->pt_base;
+	IdlePTDma = xpmap_ptom(VTOP(startinfo->pt_base));
+	l3_pages = 0;
+#endif
+	l2_pages = 1;
+	l1_pages = xen_start_info->nr_pt_frames - l2_pages - l3_pages;
+
+	KPTphysoff = (l2_pages + l3_pages)*PAGE_SIZE;
+
+	KPTphys = xpmap_ptom(VTOP(startinfo->pt_base + KPTphysoff));
+	XENPRINTF("IdlePTD %p\n", IdlePTD);
+	XENPRINTF("nr_pages: %ld shared_info: 0x%lx flags: 0x%lx pt_base: 0x%lx "
+		  "mod_start: 0x%lx mod_len: 0x%lx\n",
+		  xen_start_info->nr_pages, xen_start_info->shared_info, 
+		  xen_start_info->flags, xen_start_info->pt_base, 
+		  xen_start_info->mod_start, xen_start_info->mod_len);
+	/* Map proc0's KSTACK */
+
+	proc0kstack = cur_space; cur_space += (KSTACK_PAGES * PAGE_SIZE);
+	printk("proc0kstack=%u\n", proc0kstack);
+
+	/* vm86/bios stack */
+	cur_space += PAGE_SIZE;
+
+	/* Map space for the vm86 region */
+	vm86paddr = (vm_offset_t)cur_space;
+	cur_space += (PAGE_SIZE * 3);
+
+#ifdef PAE
+	IdlePDPTnew = (pd_entry_t *)cur_space; cur_space += PAGE_SIZE;
+	bzero(IdlePDPTnew, PAGE_SIZE);
+
+	IdlePDPTnewma =  xpmap_ptom(VTOP(IdlePDPTnew));
+	IdlePTDnew = (pd_entry_t *)cur_space; cur_space += 4*PAGE_SIZE;
+	bzero(IdlePTDnew, 4*PAGE_SIZE);
+
+	for (i = 0; i < 4; i++)
+		IdlePTDnewma[i] =
+		    xpmap_ptom(VTOP((uint8_t *)IdlePTDnew + i*PAGE_SIZE));
+	/*
+	 * L3
+	 *
+	 * Copy the 4 machine addresses of the new PTDs in to the PDPT
+	 * 
+	 */
+	for (i = 0; i < 4; i++)
+		IdlePDPTnew[i] = IdlePTDnewma[i] | PG_V;
+
+	__asm__("nop;");
+	/*
+	 *
+	 * re-map the new PDPT read-only
+	 */
+	PT_SET_MA(IdlePDPTnew, IdlePDPTnewma | PG_V);
+	/*
+	 * 
+	 * Unpin the current PDPT
+	 */
+	xen_pt_unpin(IdlePDPTma);
+	
+	for (i = 0; i < 20; i++) {
+		int startidx = ((KERNBASE >> 18) & PAGE_MASK) >> 3;
+
+		if (IdlePTD[startidx + i] == 0) {
+			l1_pages = i;
+			break;
+		}	
+	}
+		    
+#endif  /* PAE */
+	
+	/* unmap remaining pages from initial 4MB chunk
+	 *
+	 */
+	for (tmpva = cur_space; (tmpva & ((1<<22)-1)) != 0; tmpva += PAGE_SIZE) {
+		bzero((char *)tmpva, PAGE_SIZE);
+		PT_SET_MA(tmpva, (vm_paddr_t)0);
+	}
+	
+	PT_UPDATES_FLUSH();
+
+	memcpy(((uint8_t *)IdlePTDnew) + ((unsigned int)(KERNBASE >> 18)),
+	    ((uint8_t *)IdlePTD) + ((KERNBASE >> 18) & PAGE_MASK),
+	    l1_pages*sizeof(pt_entry_t));
+
+	for (i = 0; i < 4; i++) {
+		PT_SET_MA((uint8_t *)IdlePTDnew + i*PAGE_SIZE,
+		    IdlePTDnewma[i] | PG_V);
+	}
+	xen_load_cr3(VTOP(IdlePDPTnew));
+	xen_pgdpt_pin(xpmap_ptom(VTOP(IdlePDPTnew)));
+
+	/* allocate remainder of nkpt pages */
+	cur_space_pt = cur_space;
+	for (offset = (KERNBASE >> PDRSHIFT), i = l1_pages; i < nkpt;
+	     i++, cur_space += PAGE_SIZE) {
+		pdir = (offset + i) / NPDEPG;
+		curoffset = ((offset + i) % NPDEPG);
+		if (((offset + i) << PDRSHIFT) == VM_MAX_KERNEL_ADDRESS)
+			break;
+		
+		/*
+		 * make sure that all the initial page table pages
+		 * have been zeroed
+		 */
+		PT_SET_MA(cur_space_pt,
+		    xpmap_ptom(VTOP(cur_space)) | PG_V | PG_RW);
+		bzero((char *)cur_space_pt, PAGE_SIZE);
+		PT_SET_MA(cur_space_pt, (vm_paddr_t)0);
+		xen_pt_pin(xpmap_ptom(VTOP(cur_space)));
+		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
+			curoffset*sizeof(vm_paddr_t)), 
+		    xpmap_ptom(VTOP(cur_space)) | PG_KERNEL);
+		PT_UPDATES_FLUSH();
+	}
+	
+	for (i = 0; i < 4; i++) {
+		pdir = (PTDPTDI + i) / NPDEPG;
+		curoffset = (PTDPTDI + i) % NPDEPG;
+
+		xen_queue_pt_update((vm_paddr_t)(IdlePTDnewma[pdir] +
+			curoffset*sizeof(vm_paddr_t)), 
+		    IdlePTDnewma[i] | PG_V);
+	}
+
+	PT_UPDATES_FLUSH();
+	
+	IdlePTD = IdlePTDnew;
+	IdlePDPT = IdlePDPTnew;
+	IdlePDPTma = IdlePDPTnewma;
+	
+	/*
+	 * shared_info is an unsigned long so this will randomly break if
+	 * it is allocated above 4GB - I guess people are used to that
+	 * sort of thing with Xen ... sigh
+	 */
+	shinfo = xen_start_info->shared_info;
+	PT_SET_MA(HYPERVISOR_shared_info, shinfo | PG_KERNEL);
+	
+	printk("#4\n");
+
+	xen_store_ma = (((vm_paddr_t)xen_start_info->store_mfn) << PAGE_SHIFT);
+	PT_SET_MA(xen_store, xen_store_ma | PG_KERNEL);
+	console_page_ma = (((vm_paddr_t)xen_start_info->console.domU.mfn) << PAGE_SHIFT);
+	PT_SET_MA(console_page, console_page_ma | PG_KERNEL);
+
+	printk("#5\n");
+
+	set_iopl.iopl = 1;
+	err = HYPERVISOR_physdev_op(PHYSDEVOP_SET_IOPL, &set_iopl);
+	KASSERT(err == 0, ("physdev_op failed"));
+	printk("#6\n");
+#if 0
+	/* add page table for KERNBASE */
+	xen_queue_pt_update(IdlePTDma + KPTDI*sizeof(vm_paddr_t), 
+			    xpmap_ptom(VTOP(cur_space) | PG_KERNEL));
+	xen_flush_queue();
+#ifdef PAE	
+	xen_queue_pt_update(pdir_shadow_ma[3] + KPTDI*sizeof(vm_paddr_t), 
+			    xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
+#else
+	xen_queue_pt_update(pdir_shadow_ma + KPTDI*sizeof(vm_paddr_t), 
+			    xpmap_ptom(VTOP(cur_space) | PG_V | PG_A));
+#endif	
+	xen_flush_queue();
+	cur_space += PAGE_SIZE;
+	printk("#6\n");
+#endif /* 0 */	
+#ifdef notyet
+	if (xen_start_info->flags & SIF_INITDOMAIN) {
+		/* Map first megabyte */
+		for (i = 0; i < (256 << PAGE_SHIFT); i += PAGE_SIZE) 
+			PT_SET_MA(KERNBASE + i, i | PG_KERNEL | PG_NC_PCD);
+		xen_flush_queue();
+	}
+#endif
+	/*
+	 * re-map kernel text read-only
+	 *
+	 */
+	for (i = (((vm_offset_t)&btext) & ~PAGE_MASK);
+	     i < (((vm_offset_t)&etext) & ~PAGE_MASK); i += PAGE_SIZE)
+		PT_SET_MA(i, xpmap_ptom(VTOP(i)) | PG_V | PG_A);
+	
+	printk("#7\n");
+	physfree = VTOP(cur_space);
+	init_first = physfree >> PAGE_SHIFT;
+	IdlePTD = (pd_entry_t *)VTOP(IdlePTD);
+	IdlePDPT = (pd_entry_t *)VTOP(IdlePDPT);
+	setup_xen_features();
+	printk("#8, proc0kstack=%u\n", proc0kstack);
+}
+
+
+trap_info_t trap_table[] = {
+	{ 0,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(div)},
+	{ 1,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dbg)},
+	{ 3,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bpt)},
+	{ 4,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ofl)},
+	/* This is UPL on Linux and KPL on BSD */
+	{ 5,   3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(bnd)},
+	{ 6,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(ill)},
+	{ 7,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(dna)},
+	/*
+	 * { 8,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(XXX)},
+	 *   no handler for double fault
+	 */
+	{ 9,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpusegm)},
+	{10,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(tss)},
+	{11,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(missing)},
+	{12,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(stk)},
+	{13,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(prot)},
+	{14,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(page)},
+	{15,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(rsvd)},
+	{16,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(fpu)},
+	{17,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(align)},
+	{18,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(mchk)},
+	{19,   0, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(xmm)},
+	{0x80, 3, GSEL(GCODE_SEL, SEL_KPL), (unsigned long) &IDTVEC(int0x80_syscall)},
+	{  0, 0,           0, 0 }
+};
+
+/********** CODE WORTH KEEPING ABOVE HERE *****************/ 
+
+void xen_failsafe_handler(void);
+
+void
+xen_failsafe_handler(void)
+{
+
+	panic("xen_failsafe_handler called!\n");
+}
+
+void xen_handle_thread_switch(struct pcb *pcb);
+
+/* This is called by cpu_switch() when switching threads. */
+/* The pcb arg refers to the process control block of the */
+/* next thread which is to run */
+void
+xen_handle_thread_switch(struct pcb *pcb)
+{
+    uint32_t *a = (uint32_t *)&PCPU_GET(fsgs_gdt)[0];
+    uint32_t *b = (uint32_t *)&pcb->pcb_fsd;
+    multicall_entry_t mcl[3];
+    int i = 0;
+
+    /* Notify Xen of task switch */
+    mcl[i].op = __HYPERVISOR_stack_switch;
+    mcl[i].args[0] = GSEL(GDATA_SEL, SEL_KPL);
+    mcl[i++].args[1] = (unsigned long)pcb;
+
+    /* Check for update of fsd */
+    if (*a != *b || *(a+1) != *(b+1)) {
+        mcl[i].op = __HYPERVISOR_update_descriptor;
+        *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
+        *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
+    }    
+
+    a += 2;
+    b += 2;
+
+    /* Check for update of gsd */
+    if (*a != *b || *(a+1) != *(b+1)) {
+        mcl[i].op = __HYPERVISOR_update_descriptor;
+        *(uint64_t *)&mcl[i].args[0] = vtomach((vm_offset_t)a);
+        *(uint64_t *)&mcl[i++].args[2] = *(uint64_t *)b;
+    }    
+
+    (void)HYPERVISOR_multicall(mcl, i);
+}

Property changes on: i386/xen/xen_machdep.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/mp_machdep.c
===================================================================
--- i386/xen/mp_machdep.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/mp_machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1407 @@
+/*-
+ * Copyright (c) 1996, by Steve Passe
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. The name of the developer may NOT be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_apic.h"
+#include "opt_cpu.h"
+#include "opt_kdb.h"
+#include "opt_kstack_pages.h"
+#include "opt_mp_watchdog.h"
+#include "opt_sched.h"
+
+#if !defined(lint)
+#if !defined(SMP)
+#error How did you get here?
+#endif
+
+#ifndef DEV_APIC
+#error The apic device is required for SMP, add "device apic" to your config file.
+#endif
+#if defined(CPU_DISABLE_CMPXCHG) && !defined(COMPILING_LINT)
+#error SMP not supported with CPU_DISABLE_CMPXCHG
+#endif
+#endif /* not lint */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/cons.h>	/* cngetc() */
+#ifdef GPROF 
+#include <sys/gmon.h>
+#endif
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/memrange.h>
+#include <sys/mutex.h>
+#include <sys/pcpu.h>
+#include <sys/proc.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+
+#include <machine/apicreg.h>
+#include <machine/clock.h>
+#include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
+#include <machine/pcb.h>
+#include <machine/pcpu.h>
+#include <machine/psl.h>
+#include <machine/smp.h>
+#include <machine/smptests.h>	/** COUNT_XINVLTLB_HITS */
+#include <machine/specialreg.h>
+
+#include <xen/hypervisor.h>
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <xen/xen_intr.h>
+#include <xen/hypervisor.h>
+#include <xen/interface/vcpu.h>
+
+#define WARMBOOT_TARGET		0
+#define WARMBOOT_OFF		(KERNBASE + 0x0467)
+#define WARMBOOT_SEG		(KERNBASE + 0x0469)
+
+#define CMOS_REG		(0x70)
+#define CMOS_DATA		(0x71)
+#define BIOS_RESET		(0x0f)
+#define BIOS_WARM		(0x0a)
+
+/*
+ * this code MUST be enabled here and in mpboot.s.
+ * it follows the very early stages of AP boot by placing values in CMOS ram.
+ * it NORMALLY will never be needed and thus the primitive method for enabling.
+ *
+#define CHECK_POINTS
+ */
+
+/* lock region used by kernel profiling */
+int	mcount_lock;
+
+/** XXX FIXME: where does this really belong, isa.h/isa.c perhaps? */
+int	current_postcode;
+
+int	mp_naps;		/* # of Applications processors */
+int	boot_cpu_id = -1;	/* designated BSP */
+extern	int nkpt;
+
+extern	struct pcpu __pcpu[];
+
+/*
+ * CPU topology map datastructures for HTT.
+ */
+static struct cpu_group mp_groups[MAXCPU];
+static struct cpu_top mp_top;
+
+/* AP uses this during bootstrap.  Do not staticize.  */
+char *bootSTK;
+static int bootAP;
+static union descriptor *bootAPgdt;
+
+static char resched_name[MAX_VIRT_CPUS][15];
+static char callfunc_name[MAX_VIRT_CPUS][15];
+
+/* Free these after use */
+void *bootstacks[MAXCPU];
+
+/* Hotwire a 0->4MB V==P mapping */
+extern pt_entry_t *KPTphys;
+
+struct pcb stoppcbs[MAXCPU];
+
+/* Variables needed for SMP tlb shootdown. */
+vm_offset_t smp_tlb_addr1;
+vm_offset_t smp_tlb_addr2;
+volatile int smp_tlb_wait;
+
+typedef void call_data_func_t(uintptr_t , uintptr_t);
+
+
+#ifdef COUNT_IPIS
+/* Interrupt counts. */
+#ifdef IPI_PREEMPTION
+static u_long *ipi_preempt_counts[MAXCPU];
+#endif
+static u_long *ipi_ast_counts[MAXCPU];
+u_long *ipi_invltlb_counts[MAXCPU];
+u_long *ipi_invlrng_counts[MAXCPU];
+u_long *ipi_invlpg_counts[MAXCPU];
+u_long *ipi_invlcache_counts[MAXCPU];
+u_long *ipi_rendezvous_counts[MAXCPU];
+u_long *ipi_lazypmap_counts[MAXCPU];
+#endif
+
+/*
+ * Local data and functions.
+ */
+
+static u_int logical_cpus;
+
+/* used to hold the AP's until we are ready to release them */
+static struct mtx ap_boot_mtx;
+
+/* Set to 1 once we're ready to let the APs out of the pen. */
+static volatile int aps_ready = 0;
+
+/*
+ * Store data from cpu_add() until later in the boot when we actually setup
+ * the APs.
+ */
+struct cpu_info {
+	int	cpu_present:1;
+	int	cpu_bsp:1;
+	int	cpu_disabled:1;
+} static cpu_info[MAX_APIC_ID + 1];
+static int cpu_apic_ids[MAXCPU];
+
+/* Holds pending bitmap based IPIs per CPU */
+static volatile u_int cpu_ipi_pending[MAXCPU];
+
+static u_int boot_address;
+
+static void	assign_cpu_ids(void);
+static void	set_interrupt_apic_ids(void);
+static int	start_all_aps(void);
+static int	start_ap(int apic_id);
+static void	release_aps(void *dummy);
+
+static u_int	hyperthreading_cpus;
+static cpumask_t	hyperthreading_cpus_mask;
+extern void Xhypervisor_callback(void);
+extern void failsafe_callback(void);
+extern void pmap_lazyfix_action(void);
+
+void
+mp_topology(void)
+{
+	struct cpu_group *group;
+	int logical_cpus;
+	int apic_id;
+	int groups;
+	int cpu;
+
+	/* Build the smp_topology map. */
+	/* Nothing to do if there is no HTT support. */
+	if ((cpu_feature & CPUID_HTT) == 0)
+		return;
+	logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+	if (logical_cpus <= 1)
+		return;
+	group = &mp_groups[0];
+	groups = 1;
+	for (cpu = 0, apic_id = 0; apic_id <= MAX_APIC_ID; apic_id++) {
+		if (!cpu_info[apic_id].cpu_present)
+			continue;
+		/*
+		 * If the current group has members and we're not a logical
+		 * cpu, create a new group.
+		 */
+		if (group->cg_count != 0 && (apic_id % logical_cpus) == 0) {
+			group++;
+			groups++;
+		}
+		group->cg_count++;
+		group->cg_mask |= 1 << cpu;
+		cpu++;
+	}
+
+	mp_top.ct_count = groups;
+	mp_top.ct_group = mp_groups;
+	smp_topology = &mp_top;
+}
+
+
+/*
+ * Calculate usable address in base memory for AP trampoline code.
+ */
+u_int
+mp_bootaddress(u_int basemem)
+{
+
+	return (basemem);
+}
+
+void
+cpu_add(u_int apic_id, char boot_cpu)
+{
+
+	if (apic_id > MAX_APIC_ID) {
+		panic("SMP: APIC ID %d too high", apic_id);
+		return;
+	}
+	KASSERT(cpu_info[apic_id].cpu_present == 0, ("CPU %d added twice",
+	    apic_id));
+	cpu_info[apic_id].cpu_present = 1;
+	if (boot_cpu) {
+		KASSERT(boot_cpu_id == -1,
+		    ("CPU %d claims to be BSP, but CPU %d already is", apic_id,
+		    boot_cpu_id));
+		boot_cpu_id = apic_id;
+		cpu_info[apic_id].cpu_bsp = 1;
+	}
+	if (mp_ncpus < MAXCPU)
+		mp_ncpus++;
+	if (bootverbose)
+		printf("SMP: Added CPU %d (%s)\n", apic_id, boot_cpu ? "BSP" :
+		    "AP");
+}
+
+void
+cpu_mp_setmaxid(void)
+{
+
+	mp_maxid = MAXCPU - 1;
+}
+
+int
+cpu_mp_probe(void)
+{
+
+	/*
+	 * Always record BSP in CPU map so that the mbuf init code works
+	 * correctly.
+	 */
+	all_cpus = 1;
+	if (mp_ncpus == 0) {
+		/*
+		 * No CPUs were found, so this must be a UP system.  Setup
+		 * the variables to represent a system with a single CPU
+		 * with an id of 0.
+		 */
+		mp_ncpus = 1;
+		return (0);
+	}
+
+	/* At least one CPU was found. */
+	if (mp_ncpus == 1) {
+		/*
+		 * One CPU was found, so this must be a UP system with
+		 * an I/O APIC.
+		 */
+		return (0);
+	}
+
+	/* At least two CPUs were found. */
+	return (1);
+}
+
+/*
+ * Initialize the IPI handlers and start up the AP's.
+ */
+void
+cpu_mp_start(void)
+{
+	int i;
+
+	/* Initialize the logical ID to APIC ID table. */
+	for (i = 0; i < MAXCPU; i++) {
+		cpu_apic_ids[i] = -1;
+		cpu_ipi_pending[i] = 0;
+	}
+
+#if 0
+	/*
+	 * IPI list that has to be converted to Xen
+	 *
+	 */
+	/* Install an inter-CPU IPI for TLB invalidation */
+	setidt(IPI_INVLTLB, IDTVEC(invltlb),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IPI_INVLPG, IDTVEC(invlpg),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+	setidt(IPI_INVLRNG, IDTVEC(invlrng),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for cache invalidation. */
+	setidt(IPI_INVLCACHE, IDTVEC(invlcache),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for lazy pmap release */
+	setidt(IPI_LAZYPMAP, IDTVEC(lazypmap),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for all-CPU rendezvous */
+	setidt(IPI_RENDEZVOUS, IDTVEC(rendezvous),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install generic inter-CPU IPI handler */
+	setidt(IPI_BITMAP_VECTOR, IDTVEC(ipi_intr_bitmap_handler),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+
+	/* Install an inter-CPU IPI for CPU stop/restart */
+	setidt(IPI_STOP, IDTVEC(cpustop),
+	       SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
+#endif
+
+	/* Set boot_cpu_id if needed. */
+	if (boot_cpu_id == -1) {
+		boot_cpu_id = PCPU_GET(apic_id);
+		cpu_info[boot_cpu_id].cpu_bsp = 1;
+	} else
+		KASSERT(boot_cpu_id == PCPU_GET(apic_id),
+		    ("BSP's APIC ID doesn't match boot_cpu_id"));
+	cpu_apic_ids[0] = boot_cpu_id;
+
+	assign_cpu_ids();
+
+	/* Start each Application Processor */
+	start_all_aps();
+
+	/* Setup the initial logical CPUs info. */
+	logical_cpus = logical_cpus_mask = 0;
+	if (cpu_feature & CPUID_HTT)
+		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
+
+	set_interrupt_apic_ids();
+}
+
+
+static void
+iv_rendezvous(uintptr_t a, uintptr_t b)
+{
+	smp_rendezvous_action();
+}
+
+static void
+iv_invltlb(uintptr_t a, uintptr_t b)
+{
+	xen_tlb_flush();
+}
+
+static void
+iv_invlpg(uintptr_t a, uintptr_t b)
+{
+	xen_invlpg(a);
+}
+
+static void
+iv_invlrng(uintptr_t a, uintptr_t b)
+{
+	vm_offset_t start = (vm_offset_t)a;
+	vm_offset_t end = (vm_offset_t)b;
+
+	while (start < end) {
+		xen_invlpg(start);
+		start += PAGE_SIZE;
+	}
+}
+
+
+static void
+iv_invlcache(uintptr_t a, uintptr_t b)
+{
+
+	wbinvd();
+}
+
+static void
+iv_lazypmap(uintptr_t a, uintptr_t b)
+{
+	pmap_lazyfix_action();
+}
+
+
+static void
+iv_noop(uintptr_t a, uintptr_t b)
+{
+}
+
+static call_data_func_t *ipi_vectors[IPI_BITMAP_VECTOR] = 
+{
+  iv_noop,
+  iv_noop,
+  iv_rendezvous,
+  iv_invltlb,
+  iv_invlpg,
+  iv_invlrng,
+  iv_invlcache,
+  iv_lazypmap,
+};
+
+/*
+ * Reschedule call back. Nothing to do,
+ * all the work is done automatically when
+ * we return from the interrupt.
+ */
+static void
+smp_reschedule_interrupt(void *unused)
+{
+	int cpu = PCPU_GET(cpuid);
+	u_int ipi_bitmap;
+
+	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
+
+#ifdef IPI_PREEMPTION
+	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
+#ifdef COUNT_IPIS
+		*ipi_preempt_counts[cpu]++;
+#endif
+		mtx_lock_spin(&sched_lock);
+		/* Don't preempt the idle thread */
+		if (curthread != PCPU_GET(idlethread)) {
+			struct thread *running_thread = curthread;
+			if (running_thread->td_critnest > 1) 
+				running_thread->td_owepreempt = 1;
+			else 		
+				mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+		}
+		mtx_unlock_spin(&sched_lock);
+	}
+#endif
+
+	if (ipi_bitmap & (1 << IPI_AST)) {
+#ifdef COUNT_IPIS
+		*ipi_ast_counts[cpu]++;
+#endif
+		/* Nothing to do for AST */
+	}
+}
+
+struct _call_data {
+	uint16_t func_id;
+	uint16_t wait;
+	uintptr_t arg1;
+	uintptr_t arg2;
+	atomic_t started;
+	atomic_t finished;
+};
+
+static struct _call_data *call_data;
+
+static void
+smp_call_function_interrupt(void *arg)
+{	
+	call_data_func_t *func;
+	uintptr_t arg1 = call_data->arg1;
+	uintptr_t arg2 = call_data->arg2;
+	int wait = call_data->wait;
+	atomic_t *started = &call_data->started;
+	atomic_t *finished = &call_data->finished;
+
+	if (call_data->func_id > IPI_BITMAP_VECTOR)
+		panic("invalid function id %u", call_data->func_id);
+	
+	func = ipi_vectors[call_data->func_id];
+	/*
+	 * Notify initiating CPU that I've grabbed the data and am
+	 * about to execute the function
+	 */
+	mb();
+	atomic_inc(started);
+	/*
+	 * At this point the info structure may be out of scope unless wait==1
+	 */
+	(*func)(arg1, arg2);
+
+	if (wait) {
+		mb();
+		atomic_inc(finished);
+	}
+	atomic_add_int(&smp_tlb_wait, 1);
+}
+
+/*
+ * Print various information about the SMP system hardware and setup.
+ */
+void
+cpu_mp_announce(void)
+{
+	int i, x;
+
+	/* List CPUs */
+	printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id);
+	for (i = 1, x = 0; x <= MAX_APIC_ID; x++) {
+		if (!cpu_info[x].cpu_present || cpu_info[x].cpu_bsp)
+			continue;
+		if (cpu_info[x].cpu_disabled)
+			printf("  cpu (AP): APIC ID: %2d (disabled)\n", x);
+		else {
+			KASSERT(i < mp_ncpus,
+			    ("mp_ncpus and actual cpus are out of whack"));
+			printf(" cpu%d (AP): APIC ID: %2d\n", i++, x);
+		}
+	}
+}
+
+static int
+xen_smp_intr_init(unsigned int cpu)
+{
+	int rc;
+	unsigned int irq;
+	
+	pc->pc_resched_irq = pc->pc_callfunc_irq = ~0;
+
+	sprintf(resched_name[cpu], "resched%u", cpu);
+	rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
+				    cpu,
+				    resched_name[cpu],
+				    smp_reschedule_interrupt,
+	    INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq);
+
+	printf("cpu=%d irq=%d vector=%d\n",
+	    cpu, pc->pc_resched_irq, RESCHEDULE_VECTOR);
+	
+	per_cpu(resched_irq, cpu) = irq;
+
+	sprintf(callfunc_name[cpu], "callfunc%u", cpu);
+	rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
+				    cpu,
+				    callfunc_name[cpu],
+				    smp_call_function_interrupt,
+	    INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE, &irq);
+	if (rc < 0)
+		goto fail;
+	per_cpu(callfunc_irq, cpu) = irq;
+
+	printf("cpu=%d irq=%d vector=%d\n",
+	    cpu, pc->pc_callfunc_irq, CALL_FUNCTION_VECTOR);
+
+	if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0))
+		goto fail;
+
+	return 0;
+
+ fail:
+	if (per_cpu(resched_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(resched_irq, cpu));
+	if (per_cpu(callfunc_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu));
+	return rc;
+}
+
+static void
+xen_smp_intr_init_cpus(void *unused)
+{
+	int i;
+	    
+	for (i = 0; i < mp_ncpus; i++)
+		xen_smp_intr_init(i);
+}
+
+#define MTOPSIZE (1<<(14 + PAGE_SHIFT))
+/*
+ * AP CPU's call this to initialize themselves.
+ */
+void
+init_secondary(void)
+{
+	vm_offset_t addr;
+	int	gsel_tss;
+
+	/* bootAP is set in start_ap() to our ID. */
+	PCPU_SET(currentldt, _default_ldt);
+
+	gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
+#if 0	
+	gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
+#endif	
+	PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
+	PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
+	PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
+#if 0
+	PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
+	PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
+	ltr(gsel_tss);
+#endif
+	PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
+
+	/* signal our startup to the BSP. */
+	mp_naps++;
+
+	/* Spin until the BSP releases the AP's. */
+	while (!aps_ready)
+		ia32_pause();
+
+	/* BSP may have changed PTD while we were waiting */
+	invltlb();
+	for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
+		invlpg(addr);
+
+	/* set up FPU state on the AP */
+	npxinit(__INITIAL_NPXCW__);
+
+#if 0	
+	/* set up SSE registers */
+	enable_sse();
+
+	/* A quick check from sanity claus */
+	if (PCPU_GET(apic_id) != lapic_id()) {
+		printf("SMP: cpuid = %d\n", PCPU_GET(cpuid));
+		printf("SMP: actual apic_id = %d\n", lapic_id());
+		printf("SMP: correct apic_id = %d\n", PCPU_GET(apic_id));
+		panic("cpuid mismatch! boom!!");
+	}
+#endif
+	/* Initialize curthread. */
+	KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread"));
+	PCPU_SET(curthread, PCPU_GET(idlethread));
+
+	mtx_lock_spin(&ap_boot_mtx);
+#if 0
+	/* Init local apic for irq's */
+	lapic_setup(1);
+
+	/* Set memory range attributes for this CPU to match the BSP */
+	mem_range_AP_init();
+#endif
+	smp_cpus++;
+
+	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
+	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+
+	/* Determine if we are a logical CPU. */
+	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
+		logical_cpus_mask |= PCPU_GET(cpumask);
+	
+	/* Determine if we are a hyperthread. */
+	if (hyperthreading_cpus > 1 &&
+	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
+		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+
+	/* Build our map of 'other' CPUs. */
+	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+#if 0
+	if (bootverbose)
+		lapic_dump("AP");
+#endif
+	if (smp_cpus == mp_ncpus) {
+		/* enable IPI's, tlb shootdown, freezes etc */
+		atomic_store_rel_int(&smp_started, 1);
+		smp_active = 1;	 /* historic */
+	}
+
+	mtx_unlock_spin(&ap_boot_mtx);
+
+	/* wait until all the AP's are up */
+	while (smp_started == 0)
+		ia32_pause();
+
+	/* ok, now grab sched_lock and enter the scheduler */
+	mtx_lock_spin(&sched_lock);
+
+	/*
+	 * Correct spinlock nesting.  The idle thread context that we are
+	 * borrowing was created so that it would start out with a single
+	 * spin lock (sched_lock) held in fork_trampoline().  Since we've
+	 * explicitly acquired locks in this function, the nesting count
+	 * is now 2 rather than 1.  Since we are nested, calling
+	 * spinlock_exit() will simply adjust the counts without allowing
+	 * spin lock using code to interrupt us.
+	 */
+	spinlock_exit();
+	KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
+
+	binuptime(PCPU_PTR(switchtime));
+	PCPU_SET(switchticks, ticks);
+
+	cpu_throw(NULL, choosethread());	/* doesn't return */
+
+	panic("scheduler returned us to %s", __func__);
+	/* NOTREACHED */
+}
+
+/*******************************************************************
+ * local functions and data
+ */
+
+/*
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts.  If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
+ */
+static void
+set_interrupt_apic_ids(void)
+{
+	u_int apic_id;
+
+	for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
+		if (!cpu_info[apic_id].cpu_present)
+			continue;
+		if (cpu_info[apic_id].cpu_bsp)
+			continue;
+		if (cpu_info[apic_id].cpu_disabled)
+			continue;
+
+		/* Don't let hyperthreads service interrupts. */
+		if (hyperthreading_cpus > 1 &&
+		    apic_id % hyperthreading_cpus != 0)
+			continue;
+
+		intr_add_cpu(apic_id);
+	}
+}
+
+/*
+ * Assign logical CPU IDs to local APICs.
+ */
+static void
+assign_cpu_ids(void)
+{
+	u_int i;
+
+	/* Check for explicitly disabled CPUs. */
+	for (i = 0; i <= MAX_APIC_ID; i++) {
+		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp)
+			continue;
+
+		/* Don't use this CPU if it has been disabled by a tunable. */
+		if (resource_disabled("lapic", i)) {
+			cpu_info[i].cpu_disabled = 1;
+			continue;
+		}
+	}
+
+	/*
+	 * Assign CPU IDs to local APIC IDs and disable any CPUs
+	 * beyond MAXCPU.  CPU 0 has already been assigned to the BSP,
+	 * so we only have to assign IDs for APs.
+	 */
+	mp_ncpus = 1;
+	for (i = 0; i <= MAX_APIC_ID; i++) {
+		if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp ||
+		    cpu_info[i].cpu_disabled)
+			continue;
+
+		if (mp_ncpus < MAXCPU) {
+			cpu_apic_ids[mp_ncpus] = i;
+			mp_ncpus++;
+		} else
+			cpu_info[i].cpu_disabled = 1;
+	}
+	KASSERT(mp_maxid >= mp_ncpus - 1,
+	    ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid,
+	    mp_ncpus));		
+}
+
+/*
+ * start each AP in our list
+ */
+static int
+start_all_aps(void)
+{
+	int apic_id, cpu, i;
+	struct pcpu *pc;
+	
+	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
+
+	/* start each AP */
+	for (cpu = 1; cpu < mp_ncpus; cpu++) {
+		apic_id = cpu_apic_ids[cpu];
+
+		bootstacks[cpu] = (char *)kmem_alloc(kernel_map, KSTACK_PAGES * PAGE_SIZE);
+
+		/* setup a vector to our boot code */
+		*((volatile u_short *) WARMBOOT_OFF) = WARMBOOT_TARGET;
+		*((volatile u_short *) WARMBOOT_SEG) = (boot_address >> 4);
+
+		bootSTK = (char *)bootstacks[cpu] + KSTACK_PAGES * PAGE_SIZE - 4;
+		bootAP = cpu;
+		bootAPgdt = gdt + (512*cpu);
+
+		/* Get per-cpu data */
+		pc = &__pcpu[bootAP];
+		pcpu_init(pc, bootAP, sizeof(struct pcpu));
+		pc->pc_apic_id = cpu_apic_ids[bootAP];
+		pc->pc_prvspace = pc;
+		pc->pc_curthread = 0;
+
+		gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
+		gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
+		
+		PT_SET_MA(bootAPgdt, xpmap_ptom(VTOP(bootAPgdt)) | PG_V | PG_RW);
+		bzero(bootAPgdt, PAGE_SIZE);
+		for (i = 0; i < NGDT; i++)
+			ssdtosd(&gdt_segs[i], &bootAPgdt[i].sd);
+		PT_SET_MA(bootAPgdt, vtomach(bootAPgdt) | PG_V);
+#ifdef notyet
+		
+                if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) { 
+                        apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); 
+                        acpiid = xen_vcpu_physid_to_x86_acpiid(cpu_id.phys_id); 
+#ifdef CONFIG_ACPI 
+                        if (acpiid != 0xff) 
+                                x86_acpiid_to_apicid[acpiid] = apicid; 
+#endif 
+                } 
+#endif
+
+		/* attempt to start the Application Processor */
+		if (!start_ap(apic_id)) {
+			printf("AP #%d (PHY# %d) failed!\n", cpu, apic_id);
+			/* better panic as the AP may be running loose */
+			printf("panic y/n? [y] ");
+			if (cngetc() != 'n')
+				panic("bye-bye");
+		}
+
+		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+	}
+
+	/* build our map of 'other' CPUs */
+	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+
+	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
+
+	/* number of APs actually started */
+	return mp_naps;
+}
+
+extern uint8_t *pcpu_boot_stack;
+extern trap_info_t trap_table[];
+
+static void
+smp_trap_init(trap_info_t *trap_ctxt)
+{
+        const trap_info_t *t = trap_table;
+
+        for (t = trap_table; t->address; t++) {
+                trap_ctxt[t->vector].flags = t->flags;
+                trap_ctxt[t->vector].cs = t->cs;
+                trap_ctxt[t->vector].address = t->address;
+        }
+}
+
+void cpu_initialize_context(unsigned int cpu);
+
+void
+cpu_initialize_context(unsigned int cpu)
+{
+	/* vcpu_guest_context_t is too large to allocate on the stack.
+	 * Hence we allocate statically and protect it with a lock */
+	vm_page_t m[4];
+	static vcpu_guest_context_t ctxt;
+	vm_offset_t boot_stack;
+	vm_offset_t newPTD;
+	vm_paddr_t ma[NPGPTD];
+	static int color;
+	int i, err;
+
+	/*
+	 * Page 0,[0-3]	PTD
+	 * Page 1, [4]	boot stack
+	 * Page [5]	PDPT
+
+	 *
+	 */
+	for (i = 0; i < NPGPTD + 2; i++) {
+		m[i] = vm_page_alloc(NULL, color++,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
+
+		pmap_zero_page(m[i]);
+
+	}
+	boot_stack = kmem_alloc_nofault(kernel_map, 1);
+	newPTD = kmem_alloc_nofault(kernel_map, NPGPTD);
+	ma[0] = xpmap_ptom(VM_PAGE_TO_PHYS(m[0]))|PG_V;
+
+#ifdef PAE	
+	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
+	for (i = 0; i < NPGPTD; i++) {
+		((vm_paddr_t *)boot_stack)[i] =
+		ma[i] = 
+		    xpmap_ptom(VM_PAGE_TO_PHYS(m[i]))|PG_V;
+	}
+#endif	
+
+	/*
+	 * Copy cpu0 IdlePTD to new IdlePTD - copying only
+	 * kernel mappings
+	 */
+	pmap_qenter(newPTD, m, 4);
+	
+	memcpy((uint8_t *)newPTD + KPTDI*sizeof(vm_paddr_t),
+	    (uint8_t *)PTOV(IdlePTD) + KPTDI*sizeof(vm_paddr_t),
+	    nkpt*sizeof(vm_paddr_t));
+
+	pmap_qremove(newPTD, 4);
+	kmem_free(kernel_map, newPTD, 4);
+	/*
+	 * map actual idle stack to boot_stack
+	 */
+	pmap_kenter(boot_stack, VM_PAGE_TO_PHYS(m[NPGPTD]));
+
+
+	xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1])));
+	vm_page_lock_queues();
+	for (i = 0; i < 4; i++) {
+		int pdir = (PTDPTDI + i) / NPDEPG;
+		int curoffset = (PTDPTDI + i) % NPDEPG;
+		
+		xen_queue_pt_update((vm_paddr_t)
+		    ((ma[pdir] & ~PG_V) + (curoffset*sizeof(vm_paddr_t))), 
+		    ma[i]);
+	}
+	PT_UPDATES_FLUSH();
+	vm_page_unlock_queues();
+	
+	memset(&ctxt, 0, sizeof(ctxt));
+	ctxt.flags = VGCF_IN_KERNEL;
+	ctxt.user_regs.ds = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.user_regs.es = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.user_regs.fs = GSEL(GPRIV_SEL, SEL_KPL);
+	ctxt.user_regs.gs = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.user_regs.cs = GSEL(GCODE_SEL, SEL_KPL);
+	ctxt.user_regs.ss = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.user_regs.eip = (unsigned long)init_secondary;
+	ctxt.user_regs.eflags = PSL_KERNEL | 0x1000; /* IOPL_RING1 */
+
+	memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
+
+	smp_trap_init(ctxt.trap_ctxt);
+
+	ctxt.ldt_ents = 0;
+	ctxt.gdt_frames[0] = (uint32_t)((uint64_t)vtomach(bootAPgdt) >> PAGE_SHIFT);
+	ctxt.gdt_ents      = 512;
+
+#ifdef __i386__
+	ctxt.user_regs.esp = boot_stack + PAGE_SIZE;
+
+	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.kernel_sp = boot_stack + PAGE_SIZE;
+
+	ctxt.event_callback_cs     = GSEL(GCODE_SEL, SEL_KPL);
+	ctxt.event_callback_eip    = (unsigned long)Xhypervisor_callback;
+	ctxt.failsafe_callback_cs  = GSEL(GCODE_SEL, SEL_KPL);
+	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+
+	ctxt.ctrlreg[3] = xpmap_ptom(VM_PAGE_TO_PHYS(m[NPGPTD + 1]));
+#else /* __x86_64__ */
+	ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
+	ctxt.kernel_ss = GSEL(GDATA_SEL, SEL_KPL);
+	ctxt.kernel_sp = idle->thread.rsp0;
+
+	ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
+	ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
+	ctxt.syscall_callback_eip  = (unsigned long)system_call;
+
+	ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
+
+	ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
+#endif
+
+	printf("gdtpfn=%lx pdptpfn=%lx\n",
+	    ctxt.gdt_frames[0],
+	    ctxt.ctrlreg[3] >> PAGE_SHIFT);
+
+	err = HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt);
+	KASSERT(err == 0, ("VCPUOP_initialise failed"));
+	DELAY(3000);
+	err = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
+	KASSERT(err == 0, ("VCPUOP_up failed"));
+}
+
+/*
+ * This function starts the AP (application processor) identified
+ * by the APIC ID 'physicalCpu'.  It does quite a "song and dance"
+ * to accomplish this.  This is necessary because of the nuances
+ * of the different hardware we might encounter.  It isn't pretty,
+ * but it seems to work.
+ */
+static int
+start_ap(int apic_id)
+{
+	int cpus, ms;
+
+	/* used as a watchpoint to signal AP startup */
+	cpus = mp_naps;
+
+	cpu_initialize_context(apic_id);
+
+	/* Wait up to 5 seconds for it to start. */
+	for (ms = 0; ms < 5000; ms++) {
+		if (mp_naps > cpus)
+			return 1;	/* return SUCCESS */
+		DELAY(1000);
+	}
+	return 0;		/* return FAILURE */
+}
+
+#ifdef COUNT_XINVLTLB_HITS
+u_int xhits_gbl[MAXCPU];
+u_int xhits_pg[MAXCPU];
+u_int xhits_rng[MAXCPU];
+SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
+    sizeof(xhits_gbl), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
+    sizeof(xhits_pg), "IU", "");
+SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
+    sizeof(xhits_rng), "IU", "");
+
+u_int ipi_global;
+u_int ipi_page;
+u_int ipi_range;
+u_int ipi_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
+    0, "");
+
+u_int ipi_masked_global;
+u_int ipi_masked_page;
+u_int ipi_masked_range;
+u_int ipi_masked_range_size;
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
+    &ipi_masked_global, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
+    &ipi_masked_page, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
+    &ipi_masked_range, 0, "");
+SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
+    &ipi_masked_range_size, 0, "");
+#endif /* COUNT_XINVLTLB_HITS */
+
+/*
+ * Flush the TLB on all other CPU's
+ */
+static void
+smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	u_int ncpu;
+	struct _call_data data;
+
+	call_data = &data;
+	
+	ncpu = mp_ncpus - 1;	/* does not shootdown self */
+	if (ncpu < 1)
+		return;		/* no other cpus */
+	if (!(read_eflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
+	call_data->func_id = vector;
+	call_data->arg1 = addr1;
+	call_data->arg2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	ipi_all_but_self(vector);
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+	call_data = NULL;
+	mtx_unlock_spin(&smp_ipi_mtx);
+}
+
+static void
+smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+{
+	int ncpu, othercpus;
+	struct _call_data data;
+
+	othercpus = mp_ncpus - 1;
+	if (mask == (u_int)-1) {
+		ncpu = othercpus;
+		if (ncpu < 1)
+			return;
+	} else {
+		mask &= ~PCPU_GET(cpumask);
+		if (mask == 0)
+			return;
+		ncpu = bitcount32(mask);
+		if (ncpu > othercpus) {
+			/* XXX this should be a panic offence */
+			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
+			    ncpu, othercpus);
+			ncpu = othercpus;
+		}
+		/* XXX should be a panic, implied by mask == 0 above */
+		if (ncpu < 1)
+			return;
+	}
+	if (!(read_eflags() & PSL_I))
+		panic("%s: interrupts disabled", __func__);
+	mtx_lock_spin(&smp_ipi_mtx);
+	call_data = &data;		
+	call_data->func_id = vector;
+	call_data->arg1 = addr1;
+	call_data->arg2 = addr2;
+	atomic_store_rel_int(&smp_tlb_wait, 0);
+	if (mask == (u_int)-1)
+		ipi_all_but_self(vector);
+	else
+		ipi_selected(mask, vector);
+	while (smp_tlb_wait < ncpu)
+		ia32_pause();
+	call_data = NULL;
+	mtx_unlock_spin(&smp_ipi_mtx);
+}
+
+void
+smp_cache_flush(void)
+{
+
+	if (smp_started)
+		smp_tlb_shootdown(IPI_INVLCACHE, 0, 0);
+}
+
+void
+smp_invltlb(void)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_global++;
+#endif
+	}
+}
+
+void
+smp_invlpg(vm_offset_t addr)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_page++;
+#endif
+	}
+}
+
+void
+smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
+{
+
+	if (smp_started) {
+		smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_range++;
+		ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+void
+smp_masked_invltlb(u_int mask)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_global++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg(u_int mask, vm_offset_t addr)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_page++;
+#endif
+	}
+}
+
+void
+smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
+{
+
+	if (smp_started) {
+		smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
+#ifdef COUNT_XINVLTLB_HITS
+		ipi_masked_range++;
+		ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
+#endif
+	}
+}
+
+void
+ipi_bitmap_handler(struct clockframe frame);
+
+void
+ipi_bitmap_handler(struct clockframe frame)
+{
+	int cpu = PCPU_GET(cpuid);
+	u_int ipi_bitmap;
+
+	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
+
+#ifdef IPI_PREEMPTION
+	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
+#ifdef COUNT_IPIS
+		*ipi_preempt_counts[cpu]++;
+#endif
+		mtx_lock_spin(&sched_lock);
+		/* Don't preempt the idle thread */
+		if (curthread != PCPU_GET(idlethread)) {
+			struct thread *running_thread = curthread;
+			if (running_thread->td_critnest > 1) 
+				running_thread->td_owepreempt = 1;
+			else 		
+				mi_switch(SW_INVOL | SW_PREEMPT, NULL);
+		}
+		mtx_unlock_spin(&sched_lock);
+	}
+#endif
+
+	if (ipi_bitmap & (1 << IPI_AST)) {
+#ifdef COUNT_IPIS
+		*ipi_ast_counts[cpu]++;
+#endif
+		/* Nothing to do for AST */
+	}
+}
+
+/*
+ * send an IPI to a set of cpus.
+ */
+void
+ipi_selected(uint32_t cpus, u_int ipi)
+{
+	int cpu;
+	u_int bitmap = 0;
+	u_int old_pending;
+	u_int new_pending;
+	
+	if (IPI_IS_BITMAPED(ipi)) { 
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+	} 
+
+	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
+	while ((cpu = ffs(cpus)) != 0) {
+		cpu--;
+		cpus &= ~(1 << cpu);
+
+		KASSERT(cpu_apic_ids[cpu] != -1,
+		    ("IPI to non-existent CPU %d", cpu));
+
+		if (bitmap) {
+			do {
+				old_pending = cpu_ipi_pending[cpu];
+				new_pending = old_pending | bitmap;
+			} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],old_pending, new_pending));	
+
+			if (!old_pending)
+				ipi_pcpu(cpu, RESCHEDULE_VECTOR);
+			continue;
+			
+		}
+		
+		KASSERT(call_data != NULL, ("call_data not set"));
+
+		ipi_pcpu(cpu, CALL_FUNCTION_VECTOR);
+	}
+
+}
+
+/*
+ * send an IPI INTerrupt containing 'vector' to all CPUs, including myself
+ */
+void
+ipi_all(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	ipi_selected(PCPU_GET(other_cpus), ipi);
+}
+
+/*
+ * send an IPI to all CPUs EXCEPT myself
+ */
+void
+ipi_all_but_self(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	ipi_selected(all_cpus & ~(1<<curcpu), ipi);
+}
+
+/*
+ * send an IPI to myself
+ */
+void
+ipi_self(u_int ipi)
+{
+
+	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
+	lapic_ipi_vectored(ipi, APIC_IPI_DEST_SELF);
+}
+
+/*
+ * This is called once the rest of the system is up and running and we're
+ * ready to let the AP's out of the pen.
+ */
+static void
+release_aps(void *dummy __unused)
+{
+
+	if (mp_ncpus == 1) 
+		return;
+	mtx_lock_spin(&sched_lock);
+	atomic_store_rel_int(&aps_ready, 1);
+	while (smp_started == 0)
+		ia32_pause();
+	mtx_unlock_spin(&sched_lock);
+}
+SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
+SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL);
+
+#ifdef COUNT_IPIS
+/*
+ * Setup interrupt counters for IPI handlers.
+ */
+static void
+mp_ipi_intrcnt(void *dummy)
+{
+	char buf[64];
+	int i;
+
+	for (i = 0; i < mp_maxid; i++) {
+		if (CPU_ABSENT(i))
+			continue;
+		snprintf(buf, sizeof(buf), "cpu%d: invltlb", i);
+		intrcnt_add(buf, &ipi_invltlb_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d: invlrng", i);
+		intrcnt_add(buf, &ipi_invlrng_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d: invlpg", i);
+		intrcnt_add(buf, &ipi_invlpg_counts[i]);
+#ifdef IPI_PREEMPTION
+		snprintf(buf, sizeof(buf), "cpu%d: preempt", i);
+		intrcnt_add(buf, &ipi_preempt_counts[i]);
+#endif
+		snprintf(buf, sizeof(buf), "cpu%d: ast", i);
+		intrcnt_add(buf, &ipi_ast_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d: rendezvous", i);
+		intrcnt_add(buf, &ipi_rendezvous_counts[i]);
+		snprintf(buf, sizeof(buf), "cpu%d: lazypmap", i);
+		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
+	}		
+}
+SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL)
+#endif

Property changes on: i386/xen/mp_machdep.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/xen/pmap.c
===================================================================
--- i386/xen/pmap.c	(.../stable/6/sys)	(revision 0)
+++ i386/xen/pmap.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,3885 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * the Systems Programming Group of the University of Utah Computer
+ * Science Department and William Jolitz of UUNET Technologies Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
+ */
+/*-
+ * Copyright (c) 2003 Networks Associates Technology, Inc.
+ * All rights reserved.
+ *
+ * This software was developed for the FreeBSD Project by Jake Burkholder,
+ * Safeport Network Services, and Network Associates Laboratories, the
+ * Security Research Division of Network Associates, Inc. under
+ * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
+ * CHATS research program.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+/*
+ *	Manages physical address maps.
+ *
+ *	In addition to hardware address maps, this
+ *	module is called upon to provide software-use-only
+ *	maps which may or may not be stored in the same
+ *	form as hardware maps.  These pseudo-maps are
+ *	used to store intermediate results from copy
+ *	operations to and from address spaces.
+ *
+ *	Since the information managed by this module is
+ *	also stored by the logical address mapping module,
+ *	this module may throw away valid virtual-to-physical
+ *	mappings at almost any time.  However, invalidations
+ *	of virtual-to-physical mappings must be done as
+ *	requested.
+ *
+ *	In order to cope with hardware architectures which
+ *	make virtual-to-physical map invalidates expensive,
+ *	this module may delay invalidate or reduced protection
+ *	operations until such time as they are actually
+ *	necessary.  This module is given full information as
+ *	to which processors are currently using which maps,
+ *	and to when physical maps must be made correct.
+ */
+
+#include "opt_cpu.h"
+#include "opt_pmap.h"
+#include "opt_msgbuf.h"
+#include "opt_xbox.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/ktr.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+#include <sys/msgbuf.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sx.h>
+#include <sys/vmmeter.h>
+#include <sys/sched.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#ifdef SMP
+#include <sys/smp.h>
+#endif
+
+#ifdef XBOX
+#include <machine/xbox.h>
+#endif
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/uma.h>
+
+#ifdef XEN
+#include <xen/interface/xen.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/hypercall.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+#endif
+
+#include <machine/cpu.h>
+#include <machine/cputypes.h>
+#include <machine/md_var.h>
+#include <machine/pcb.h>
+#include <machine/specialreg.h>
+#ifdef SMP
+#include <machine/smp.h>
+#endif
+
+#ifdef XBOX
+#include <machine/xbox.h>
+#endif
+
+#include <xen/interface/xen.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/hypercall.h>
+#include <machine/xen/xenvar.h>
+#include <machine/xen/xenfunc.h>
+
+#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU)
+#define CPU_ENABLE_SSE
+#endif
+
+#ifndef PMAP_SHPGPERPROC
+#define PMAP_SHPGPERPROC 200
+#endif
+
+#define PMAP_DIAGNOSTIC
+
+#if defined(DIAGNOSTIC)
+#define PMAP_DIAGNOSTIC
+#endif
+
+#if !defined(PMAP_DIAGNOSTIC)
+#define PMAP_INLINE __inline
+#else
+#define PMAP_INLINE
+#endif
+
+/*
+ * Get PDEs and PTEs for user/kernel address space
+ */
+#define	pmap_pde(m, v)	(&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
+#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
+
+#define pmap_pde_v(pte)		((*(int *)pte & PG_V) != 0)
+#define pmap_pte_w(pte)		((*(int *)pte & PG_W) != 0)
+#define pmap_pte_m(pte)		((*(int *)pte & PG_M) != 0)
+#define pmap_pte_u(pte)		((*(int *)pte & PG_A) != 0)
+#define pmap_pte_v(pte)		((*(int *)pte & PG_V) != 0)
+
+#ifndef XEN
+#define pmap_pte_set_w(pte, v)	((v) ? atomic_set_int((u_int *)(pte), PG_W) : \
+    atomic_clear_int((u_int *)(pte), PG_W))
+#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
+#endif
+
+struct pmap kernel_pmap_store;
+LIST_HEAD(pmaplist, pmap);
+static struct pmaplist allpmaps;
+static struct mtx allpmaps_lock;
+
+vm_paddr_t avail_end;	/* PA of last available physical page */
+vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
+vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
+int pgeflag = 0;		/* PG_G or-in */
+int pseflag = 0;		/* PG_PS or-in */
+
+int nkpt;
+vm_offset_t kernel_vm_end;
+extern u_int32_t KERNend;
+
+#if defined(PAE) && !defined(XEN)
+static uma_zone_t pdptzone;
+#endif
+
+/*
+ * Data for the pv entry allocation mechanism
+ */
+static uma_zone_t pvzone;
+static struct vm_object pvzone_obj;
+static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0;
+int pmap_pagedaemon_waken;
+
+/*
+ * All those kernel PT submaps that BSD is so fond of
+ */
+struct sysmaps {
+	struct	mtx lock;
+	pt_entry_t *CMAP1;
+	pt_entry_t *CMAP2;
+	caddr_t	CADDR1;
+	caddr_t	CADDR2;
+};
+static struct sysmaps sysmaps_pcpu[MAXCPU];
+pt_entry_t *CMAP1 = 0;
+static pt_entry_t *CMAP3;
+caddr_t CADDR1 = 0, ptvmmap = 0;
+static caddr_t CADDR3;
+struct msgbuf *msgbufp = 0;
+
+/*
+ * Crashdump maps.
+ */
+static caddr_t crashdumpmap;
+
+#ifdef SMP
+extern pt_entry_t *SMPpt;
+#endif
+static pt_entry_t *PMAP1 = 0, *PMAP2;
+static pt_entry_t *PADDR1 = 0, *PADDR2;
+#ifdef SMP
+static int PMAP1cpu;
+static int PMAP1changedcpu;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 
+	   &PMAP1changedcpu, 0,
+	   "Number of times pmap_pte_quick changed CPU with same PMAP1");
+#endif
+static int PMAP1changed;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 
+	   &PMAP1changed, 0,
+	   "Number of times pmap_pte_quick changed PMAP1");
+static int PMAP1unchanged;
+SYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 
+	   &PMAP1unchanged, 0,
+	   "Number of times pmap_pte_quick didn't change PMAP1");
+static struct mtx PMAP2mutex;
+
+static PMAP_INLINE void	free_pv_entry(pv_entry_t pv);
+static pv_entry_t get_pv_entry(void);
+static void	pmap_clear_ptes(vm_page_t m, int bit);
+
+static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
+    vm_page_t m, vm_prot_t prot, vm_page_t mpte);
+static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva,
+    vm_page_t *free);
+static void pmap_remove_page(struct pmap *pmap, vm_offset_t va);
+static void pmap_remove_entry(struct pmap *pmap, vm_page_t m,
+					vm_offset_t va);
+static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m);
+static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
+    vm_page_t m);
+
+static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags);
+
+static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags);
+static int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free);
+static pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va);
+static void pmap_pte_release(pt_entry_t *pte);
+static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *);
+static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
+#if defined(PAE) && !defined(XEN)
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
+
+CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t));
+CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t));
+
+/*
+ * If you get an error here, then you set KVA_PAGES wrong! See the
+ * description of KVA_PAGES in sys/i386/include/pmap.h. It must be
+ * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE.
+ */
+CTASSERT(KERNBASE % (1 << 24) == 0);
+
+static __inline void
+pagezero(void *page)
+{
+#if defined(I686_CPU)
+	if (cpu_class == CPUCLASS_686) {
+#if defined(CPU_ENABLE_SSE)
+		if (cpu_feature & CPUID_SSE2)
+			sse2_pagezero(page);
+		else
+#endif
+			i686_pagezero(page);
+	} else
+#endif
+		bzero(page, PAGE_SIZE);
+}
+
+void 
+pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type)
+{
+	vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]);
+	
+	switch (type) {
+	case SH_PD_SET_VA:
+#if 0		
+		xen_queue_pt_update(shadow_pdir_ma,
+				    xpmap_ptom(val & ~(PG_RW)));
+#endif		
+		xen_queue_pt_update(pdir_ma,
+				    xpmap_ptom(val)); 	
+		break;
+	case SH_PD_SET_VA_MA:
+#if 0		
+		xen_queue_pt_update(shadow_pdir_ma,
+				    val & ~(PG_RW));
+#endif		
+		xen_queue_pt_update(pdir_ma, val); 	
+		break;
+	case SH_PD_SET_VA_CLEAR:
+#if 0
+		xen_queue_pt_update(shadow_pdir_ma, 0);
+#endif		
+		xen_queue_pt_update(pdir_ma, 0); 	
+		break;
+	}
+}
+
+/*
+ * Move the kernel virtual free pointer to the next
+ * 4MB.  This is used to help improve performance
+ * by using a large (4MB) page for much of the kernel
+ * (.text, .data, .bss)
+ */
+static vm_offset_t
+pmap_kmem_choose(vm_offset_t addr)
+{
+	vm_offset_t newaddr = addr;
+
+#ifndef DISABLE_PSE
+	if (cpu_feature & CPUID_PSE)
+		newaddr = (addr + PDRMASK) & ~PDRMASK;
+#endif
+	return newaddr;
+}
+
+/*
+ *	Bootstrap the system enough to run with virtual memory.
+ *
+ *	On the i386 this is called after mapping has already been enabled
+ *	and just syncs the pmap module with what has already been done.
+ *	[We can't call it easily with mapping off since the kernel is not
+ *	mapped with PA == VA, hence we would have to relocate every address
+ *	from the linked base (virtual) address "KERNBASE" to the actual
+ *	(physical) address starting relative to 0]
+ */
+void
+pmap_bootstrap(firstaddr, loadaddr)
+	vm_paddr_t firstaddr;
+	vm_paddr_t loadaddr;
+{
+	vm_offset_t va;
+	pt_entry_t *pte, *unused;
+	struct sysmaps *sysmaps;
+	int i;
+
+	/*
+	 * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
+	 * large. It should instead be correctly calculated in locore.s and
+	 * not based on 'first' (which is a physical address, not a virtual
+	 * address, for the start of unused physical memory). The kernel
+	 * page tables are NOT double mapped and thus should not be included
+	 * in this calculation.
+	 */
+	virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+	virtual_avail = pmap_kmem_choose(virtual_avail);
+
+	virtual_end = VM_MAX_KERNEL_ADDRESS;
+
+	/*
+	 * Initialize the kernel pmap (which is statically allocated).
+	 */
+	PMAP_LOCK_INIT(kernel_pmap);
+	kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+#ifdef PAE
+	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
+	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	TAILQ_INIT(&kernel_pmap->pm_pvlist);
+	LIST_INIT(&allpmaps);
+	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+	nkpt = NKPT;
+
+	/*
+	 * Reserve some special page table entries/VA space for temporary
+	 * mapping of pages.
+	 */
+#define	SYSMAP(c, p, v, n)	\
+	v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n);
+
+	va = virtual_avail;
+	pte = vtopte(va);
+
+	/*
+	 * CMAP1/CMAP2 are used for zeroing and copying pages.
+	 * CMAP3 is used for the idle process page zeroing.
+	 */
+	for (i = 0; i < MAXCPU; i++) {
+		sysmaps = &sysmaps_pcpu[i];
+		mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF);
+		SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1)
+		SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1)
+	}
+	SYSMAP(caddr_t, CMAP1, CADDR1, 1)
+	SYSMAP(caddr_t, CMAP3, CADDR3, 1)
+#ifdef XEN
+	PT_SET_MA(CADDR3, 0);
+#else 
+	 *CMAP3 = 0;
+#endif
+	/*
+	 * Crashdump maps.
+	 */
+	SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS)
+
+	/*
+	 * ptvmmap is used for reading arbitrary physical pages via /dev/mem.
+	 */
+	SYSMAP(caddr_t, unused, ptvmmap, 1)
+
+	/*
+	 * msgbufp is used to map the system message buffer.
+	 */
+	SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE)))
+
+	/*
+	 * ptemap is used for pmap_pte_quick
+	 */
+	SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
+	SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1);
+
+	mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF);
+
+	virtual_avail = va;
+#ifdef XEN
+	PT_SET_MA(CADDR1, 0);
+#else	
+	*CMAP1 = 0;
+#endif
+	
+#if !defined(XEN)
+#ifdef XBOX
+	/* FIXME: This is gross, but needed for the XBOX. Since we are in such
+	 * an early stadium, we cannot yet neatly map video memory ... :-(
+	 * Better fixes are very welcome!
+	 */
+	if (!arch_i386_is_xbox)
+#endif
+	for (i = 0; i < NKPT; i++)
+		PTD[i] = 0;
+
+	/* Initialize the PAT MSR if present. */
+	pmap_init_pat();
+
+	/* Turn on PG_G on kernel page(s) */
+	pmap_set_pg();
+#endif /* !XEN */
+}
+
+/*
+ * Setup the PAT MSR.
+ */
+void
+pmap_init_pat(void)
+{
+	uint64_t pat_msr;
+
+	/* Bail if this CPU doesn't implement PAT. */
+	if (!(cpu_feature & CPUID_PAT))
+		return;
+
+#ifdef PAT_WORKS
+	/*
+	 * Leave the indices 0-3 at the default of WB, WT, UC, and UC-.
+	 * Program 4 and 5 as WP and WC.
+	 * Leave 6 and 7 as UC and UC-.
+	 */
+	pat_msr = rdmsr(MSR_PAT);
+	pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5));
+	pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) |
+	    PAT_VALUE(5, PAT_WRITE_COMBINING);
+#else
+	/*
+	 * Due to some Intel errata, we can only safely use the lower 4
+	 * PAT entries.  Thus, just replace PAT Index 2 with WC instead
+	 * of UC-.
+	 *
+	 *   Intel Pentium III Processor Specification Update
+	 * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B
+	 * or Mode C Paging)
+	 *
+	 *   Intel Pentium IV  Processor Specification Update
+	 * Errata N46 (PAT Index MSB May Be Calculated Incorrectly)
+	 */
+	pat_msr = rdmsr(MSR_PAT);
+	pat_msr &= ~PAT_MASK(2);
+	pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING);
+#endif
+	wrmsr(MSR_PAT, pat_msr);
+}
+
+/*
+ * Set PG_G on kernel pages.  Only the BSP calls this when SMP is turned on.
+ */
+void
+pmap_set_pg(void)
+{
+	pd_entry_t pdir;
+	pt_entry_t *pte;
+	vm_offset_t va, endva;
+	int i; 
+
+	if (pgeflag == 0)
+		return;
+
+	i = KERNLOAD/NBPDR;
+	endva = KERNBASE + KERNend;
+
+	if (pseflag) {
+		va = KERNBASE + KERNLOAD;
+		while (va  < endva) {
+			pdir = kernel_pmap->pm_pdir[KPTDI+i];
+			pdir |= pgeflag;
+			kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir;
+			invltlb();	/* Play it safe, invltlb() every time */
+			i++;
+			va += NBPDR;
+		}
+	} else {
+		va = (vm_offset_t)btext;
+		while (va < endva) {
+			pte = vtopte(va);
+			if (*pte) {
+#ifdef XEN
+				PT_SET_MA(va, *pte | pgeflag);
+#else	
+				*pte |= pgeflag;
+#endif
+			}
+			invltlb();	/* Play it safe, invltlb() every time */
+			va += PAGE_SIZE;
+		}
+	}
+}
+
+/*
+ * Initialize a vm_page's machine-dependent fields.
+ */
+void
+pmap_page_init(vm_page_t m)
+{
+
+	TAILQ_INIT(&m->md.pv_list);
+	m->md.pv_list_count = 0;
+}
+
+#if defined(PAE) && !defined(XEN)
+
+static MALLOC_DEFINE(M_PMAPPDPT, "pmap", "pmap pdpt");
+
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+	*flags = UMA_SLAB_PRIV;
+	return (contigmalloc(PAGE_SIZE, M_PMAPPDPT, 0, 0x0ULL, 0xffffffffULL,
+	    1, 0));
+}
+#endif
+
+/*
+ *	Initialize the pmap module.
+ *	Called by vm_init, to initialize any structures that the pmap
+ *	system needs to map virtual memory.
+ */
+void
+pmap_init(void)
+{
+	int shpgperproc = PMAP_SHPGPERPROC;
+
+	/*
+	 * Initialize the address space (zone) for the pv entries.  Set a
+	 * high water mark so that the system can recover from excessive
+	 * numbers of pv entries.
+	 */
+	pvzone = uma_zcreate("PV ENTRY", sizeof(struct pv_entry), NULL, NULL, 
+	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc);
+	pv_entry_max = shpgperproc * maxproc + cnt.v_page_count;
+	TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max);
+	pv_entry_high_water = 9 * (pv_entry_max / 10);
+	uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max);
+
+#if defined(PAE) && !defined(XEN)
+	pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+	    NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1,
+	    UMA_ZONE_VM | UMA_ZONE_NOFREE);
+	uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+}
+
+void
+pmap_init2()
+{
+}
+
+
+/***************************************************
+ * Low level helper routines.....
+ ***************************************************/
+
+/*
+ * Determine the appropriate bits to set in a PTE or PDE for a specified
+ * caching mode.
+ */
+static int
+pmap_cache_bits(int mode, boolean_t is_pde)
+{
+	int pat_flag, pat_index, cache_bits;
+
+	/* The PAT bit is different for PTE's and PDE's. */
+	pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT;
+
+	/* If we don't support PAT, map extended modes to older ones. */
+	if (!(cpu_feature & CPUID_PAT)) {
+		switch (mode) {
+		case PAT_UNCACHEABLE:
+		case PAT_WRITE_THROUGH:
+		case PAT_WRITE_BACK:
+			break;
+		case PAT_UNCACHED:
+		case PAT_WRITE_COMBINING:
+		case PAT_WRITE_PROTECTED:
+			mode = PAT_UNCACHEABLE;
+			break;
+		}
+	}
+	
+	/* Map the caching mode to a PAT index. */
+	switch (mode) {
+#ifdef PAT_WORKS
+	case PAT_UNCACHEABLE:
+		pat_index = 3;
+		break;
+	case PAT_WRITE_THROUGH:
+		pat_index = 1;
+		break;
+	case PAT_WRITE_BACK:
+		pat_index = 0;
+		break;
+	case PAT_UNCACHED:
+		pat_index = 2;
+		break;
+	case PAT_WRITE_COMBINING:
+		pat_index = 5;
+		break;
+	case PAT_WRITE_PROTECTED:
+		pat_index = 4;
+		break;
+#else
+	case PAT_UNCACHED:
+	case PAT_UNCACHEABLE:
+	case PAT_WRITE_PROTECTED:
+		pat_index = 3;
+		break;
+	case PAT_WRITE_THROUGH:
+		pat_index = 1;
+		break;
+	case PAT_WRITE_BACK:
+		pat_index = 0;
+		break;
+	case PAT_WRITE_COMBINING:
+		pat_index = 2;
+		break;
+#endif
+	default:
+		panic("Unknown caching mode %d\n", mode);
+	}	
+
+	/* Map the 3-bit index value into the PAT, PCD, and PWT bits. */
+	cache_bits = 0;
+	if (pat_index & 0x4)
+		cache_bits |= pat_flag;
+	if (pat_index & 0x2)
+		cache_bits |= PG_NC_PCD;
+	if (pat_index & 0x1)
+		cache_bits |= PG_NC_PWT;
+	return (cache_bits);
+}
+#ifdef SMP
+/*
+ * For SMP, these functions have to use the IPI mechanism for coherence.
+ *
+ * N.B.: Before calling any of the following TLB invalidation functions,
+ * the calling processor must ensure that all stores updating a non-
+ * kernel page table are globally performed.  Otherwise, another
+ * processor could cache an old, pre-update entry without being
+ * invalidated.  This can happen one of two ways: (1) The pmap becomes
+ * active on another processor after its pm_active field is checked by
+ * one of the following functions but before a store updating the page
+ * table is globally performed. (2) The pmap becomes active on another
+ * processor before its pm_active field is checked but due to
+ * speculative loads one of the following functions stills reads the
+ * pmap as inactive on the other processor.
+ * 
+ * The kernel page table is exempt because its pm_active field is
+ * immutable.  The kernel page table is always active on every
+ * processor.
+ */
+void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+	u_int cpumask;
+	u_int other_cpus;
+
+	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
+	    pmap, va);
+
+	sched_pin();
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		invlpg(va);
+		smp_invlpg(va);
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			invlpg(va);
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+	}
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+}
+
+void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	u_int cpumask;
+	u_int other_cpus;
+	vm_offset_t addr;
+
+	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
+	    pmap, sva, eva);
+
+	sched_pin();
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+		smp_invlpg_range(sva, eva);
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			for (addr = sva; addr < eva; addr += PAGE_SIZE)
+				invlpg(addr);
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
+			    sva, eva);
+	}
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+}
+
+void
+pmap_invalidate_all(pmap_t pmap)
+{
+	u_int cpumask;
+	u_int other_cpus;
+
+	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
+	sched_pin();
+	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+		invltlb();
+		smp_invltlb();
+	} else {
+		cpumask = PCPU_GET(cpumask);
+		other_cpus = PCPU_GET(other_cpus);
+		if (pmap->pm_active & cpumask)
+			invltlb();
+		if (pmap->pm_active & other_cpus)
+			smp_masked_invltlb(pmap->pm_active & other_cpus);
+	}
+	sched_unpin();
+}
+
+void
+pmap_invalidate_cache(void)
+{
+
+	sched_pin();
+	wbinvd();
+	smp_cache_flush();
+	sched_unpin();
+}
+#else /* !SMP */
+/*
+ * Normal, non-SMP, 486+ invalidation functions.
+ * We inline these within pmap.c for speed.
+ */
+PMAP_INLINE void
+pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
+{
+	if (pmap == kernel_pmap || pmap->pm_active) {
+		CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
+		    pmap, va);
+		invlpg(va);
+		PT_UPDATES_FLUSH();
+	}
+}
+
+PMAP_INLINE void
+pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t addr;
+
+	if (pmap == kernel_pmap || pmap->pm_active) {
+		if (eva - sva > PAGE_SIZE)
+			CTR3(KTR_PMAP,
+			    "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
+			    pmap, sva, eva);
+		for (addr = sva; addr < eva; addr += PAGE_SIZE)
+			invlpg(addr);
+		PT_UPDATES_FLUSH();
+	}
+}
+
+PMAP_INLINE void
+pmap_invalidate_all(pmap_t pmap)
+{
+
+	
+	if (pmap == kernel_pmap || pmap->pm_active) {
+		CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
+		invltlb();
+	}
+}
+
+PMAP_INLINE void
+pmap_invalidate_cache(void)
+{
+
+	wbinvd();
+}
+#endif /* !SMP */
+
+/*
+ * Are we current address space or kernel?  N.B. We return FALSE when
+ * a pmap's page table is in use because a kernel thread is borrowing
+ * it.  The borrowed page table can change spontaneously, making any
+ * dependence on its continued use subject to a race condition.
+ */
+static __inline int
+pmap_is_current(pmap_t pmap)
+{
+
+	return (pmap == kernel_pmap ||
+		(pmap == vmspace_pmap(curthread->td_proc->p_vmspace) &&
+	    (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)));
+}
+
+/*
+ * If the given pmap is not the current or kernel pmap, the returned pte must
+ * be released by passing it to pmap_pte_release().
+ */
+pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t newpf;
+	pd_entry_t *pde;
+
+	pde = pmap_pde(pmap, va);
+	if (*pde & PG_PS)
+		return (pde);
+	if (*pde != 0) {
+		/* are we current address space or kernel? */
+		if (pmap_is_current(pmap))
+			return (vtopte(va));
+		mtx_lock(&PMAP2mutex);
+		newpf = *pde & PG_FRAME;
+		if ((*PMAP2 & PG_FRAME) != newpf) {
+#ifdef XEN
+			PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M);
+			CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x",
+			    pmap, va, (*PMAP2 & 0xffffffff));
+#else		
+			*PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M;
+			pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2);
+#endif
+		}
+		return (PADDR2 + (i386_btop(va) & (NPTEPG - 1)));
+	}
+	return (0);
+}
+
+/*
+ * Releases a pte that was obtained from pmap_pte().  Be prepared for the pte
+ * being NULL.
+ */
+static __inline void
+pmap_pte_release(pt_entry_t *pte)
+{
+
+	if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) {
+		CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx",
+		    *PMAP2);
+		PT_SET_VA_MA(PMAP2, 0, TRUE);
+		mtx_unlock(&PMAP2mutex);
+	}
+}
+
+static __inline void
+invlcaddr(void *caddr)
+{
+
+	invlpg((u_int)caddr);
+	PT_UPDATES_FLUSH();
+}
+
+/*
+ * Super fast pmap_pte routine best used when scanning
+ * the pv lists.  This eliminates many coarse-grained
+ * invltlb calls.  Note that many of the pv list
+ * scans are across different pmaps.  It is very wasteful
+ * to do an entire invltlb for checking a single mapping.
+ *
+ * If the given pmap is not the current pmap, vm_page_queue_mtx
+ * must be held and curthread pinned to a CPU.
+ */
+static pt_entry_t *
+pmap_pte_quick(pmap_t pmap, vm_offset_t va)
+{
+	pd_entry_t newpf;
+	pd_entry_t *pde;
+
+	pde = pmap_pde(pmap, va);
+	if (*pde & PG_PS)
+		return (pde);
+
+	/*
+	 *
+	 * XXX hitting this indicates that things are AFU
+	 */
+	if (*pde != 0) {
+		/* are we current address space or kernel? */
+		if (pmap_is_current(pmap))
+			return (vtopte(va));
+		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+		KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+		newpf = *pde & PG_FRAME;
+		if ((*PMAP1 & PG_FRAME) != newpf) {
+#ifdef XEN	
+			PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M);
+			CTR3(KTR_PMAP,
+			    "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x",
+			    pmap, va, (u_long)*PMAP1);
+#else	
+			*PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M;
+#endif
+#ifdef SMP
+			PMAP1cpu = PCPU_GET(cpuid);
+#endif
+			invlcaddr(PADDR1);
+			PMAP1changed++;
+		} else
+#ifdef SMP
+		if (PMAP1cpu != PCPU_GET(cpuid)) {
+			PMAP1cpu = PCPU_GET(cpuid);
+			invlcaddr(PADDR1);
+			PMAP1changedcpu++;
+		} else
+#endif
+			PMAP1unchanged++;
+		return (PADDR1 + (i386_btop(va) & (NPTEPG - 1)));
+	}
+	return (0);
+}
+
+/*
+ *	Routine:	pmap_extract
+ *	Function:
+ *		Extract the physical page address associated
+ *		with the given map/virtual_address pair.
+ */
+
+vm_paddr_t 
+pmap_extract(pmap_t pmap, vm_offset_t va)
+{
+	vm_paddr_t rtval;
+	pt_entry_t *pte;
+	pd_entry_t pde;
+
+	rtval = 0;
+	PMAP_LOCK(pmap);
+	pde = pmap->pm_pdir[va >> PDRSHIFT];
+	if (pde != 0) {
+		if ((pde & PG_PS) != 0) {
+#ifdef XEN
+			rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK);
+#else			
+			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+#endif
+			PMAP_UNLOCK(pmap);
+			return (rtval);
+		}
+		pte = pmap_pte(pmap, va);
+#ifdef XEN
+		rtval = ((*pte ? xpmap_mtop(*pte) : 0) & PG_FRAME) | (va & PAGE_MASK);
+		
+#else		
+		rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
+#endif
+		pmap_pte_release(pte);
+	}
+	PMAP_UNLOCK(pmap);
+	return (rtval);
+}
+
+vm_paddr_t
+pmap_extract_ma(pmap_t pmap, vm_offset_t va)
+{
+	vm_paddr_t rtval;
+	pt_entry_t *pte;
+	pd_entry_t pde;
+
+	rtval = 0;
+	PMAP_LOCK(pmap);
+	pde = pmap->pm_pdir[va >> PDRSHIFT];
+	if (pde != 0) {
+		if ((pde & PG_PS) != 0) {
+			rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+
+			PMAP_UNLOCK(pmap);
+			return (rtval);
+		}
+		pte = pmap_pte(pmap, va);
+
+		rtval = (*pte & PG_FRAME) | (va & PAGE_MASK);
+		pmap_pte_release(pte);
+	}
+	PMAP_UNLOCK(pmap);
+	return (rtval);
+}
+
+
+/*
+ *	Routine:	pmap_extract_and_hold
+ *	Function:
+ *		Atomically extract and hold the physical page
+ *		with the given pmap and virtual address pair
+ *		if that mapping permits the given protection.
+ */
+vm_page_t
+pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
+{
+	pd_entry_t pde;
+	pt_entry_t pte;
+	vm_page_t m;
+
+	m = NULL;
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	pde = PT_GET(pmap_pde(pmap, va));
+	if (pde != 0) {
+		if (pde & PG_PS) {
+			if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) {
+				m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) |
+				    (va & PDRMASK));
+				vm_page_hold(m);
+			}
+		} else {
+			sched_pin();
+			pte = PT_GET(pmap_pte_quick(pmap, va));
+			if (pte != 0 &&
+			    ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) {
+				m = PHYS_TO_VM_PAGE(pte & PG_FRAME);
+				vm_page_hold(m);
+			}
+			sched_unpin();
+		}
+	}
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+	return (m);
+}
+
+/***************************************************
+ * Low level mapping routines.....
+ ***************************************************/
+
+/*
+ * Add a wired page to the kva.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void 
+pmap_kenter(vm_offset_t va, vm_paddr_t pa)
+{
+	PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag);
+}
+
+PMAP_INLINE void 
+pmap_kenter_ma(vm_offset_t va, vm_paddr_t pa)
+{
+
+	PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag);
+}
+
+PMAP_INLINE void 
+pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode)
+{
+	PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0));
+}
+
+/*
+ * Remove a page from the kernel pagetables.
+ * Note: not SMP coherent.
+ */
+PMAP_INLINE void
+pmap_kremove(vm_offset_t va)
+{
+	pt_entry_t *pte;
+
+	pte = vtopte(va);
+	PT_SET_VA_MA(pte, 0, FALSE);
+}
+
+/*
+ *	Used to map a range of physical addresses into kernel
+ *	virtual address space.
+ *
+ *	The value passed in '*virt' is a suggested virtual address for
+ *	the mapping. Architectures which can support a direct-mapped
+ *	physical to virtual region can return the appropriate address
+ *	within that region, leaving '*virt' unchanged. Other
+ *	architectures should map the pages starting at '*virt' and
+ *	update '*virt' with the first usable address after the mapped
+ *	region.
+ */
+vm_offset_t
+pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
+{
+	vm_offset_t va, sva;
+
+	va = sva = *virt;
+	while (start < end) {
+		pmap_kenter(va, start);
+		va += PAGE_SIZE;
+		start += PAGE_SIZE;
+	}
+	pmap_invalidate_range(kernel_pmap, sva, va);
+	*virt = va;
+	return (sva);
+}
+
+
+/*
+ * Add a list of wired pages to the kva
+ * this routine is only used for temporary
+ * kernel mappings that do not need to have
+ * page modification or references recorded.
+ * Note that old mappings are simply written
+ * over.  The page *must* be wired.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
+{
+	pt_entry_t *endpte, oldpte, *pte;
+
+	oldpte = 0;
+	pte = vtopte(sva);
+	endpte = pte + count;
+	vm_page_lock_queues();
+	critical_enter();
+	while (pte < endpte) {
+		oldpte |= *pte;
+#ifdef XEN
+		PT_SET_VA(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V, FALSE);
+#else		
+		pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | PG_RW | PG_V);
+#endif
+		pte++;
+		ma++;
+	}
+	PT_UPDATES_FLUSH();
+	if ((oldpte & PG_V) != 0)
+		pmap_invalidate_range(kernel_pmap, sva, sva + count *
+		    PAGE_SIZE);
+	vm_page_unlock_queues();
+	critical_exit();
+}
+
+/*
+ * This routine tears out page mappings from the
+ * kernel -- it is meant only for temporary mappings.
+ * Note: SMP coherent.  Uses a ranged shootdown IPI.
+ */
+void
+pmap_qremove(vm_offset_t sva, int count)
+{
+	vm_offset_t va;
+
+	va = sva;
+	vm_page_lock_queues();
+	critical_enter();
+	while (count-- > 0) {
+		pmap_kremove(va);
+		va += PAGE_SIZE;
+	}
+	pmap_invalidate_range(kernel_pmap, sva, va);
+	critical_exit();
+	vm_page_unlock_queues();
+
+}
+
+/***************************************************
+ * Page table page management routines.....
+ ***************************************************/
+static PMAP_INLINE void
+pmap_free_zero_pages(vm_page_t free)
+{
+	vm_page_t m;
+
+	while (free != NULL) {
+		m = free;
+		free = m->right;
+		vm_page_free_zero(m);
+	}
+}
+
+/*
+ * This routine unholds page table pages, and if the hold count
+ * drops to zero, then it decrements the wire count.
+ */
+static PMAP_INLINE int
+pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
+{
+
+	--m->wire_count;
+	if (m->wire_count == 0)
+		return _pmap_unwire_pte_hold(pmap, m, free);
+	else
+		return 0;
+}
+
+static int 
+_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free)
+{
+	vm_offset_t pteva;
+
+	PT_UPDATES_FLUSH();
+	/*
+	 * unmap the page table page
+	 */
+#ifdef XEN
+	xen_pt_unpin(pmap->pm_pdir[m->pindex]);
+	PT_SET_VA_MA(&pmap->pm_pdir[m->pindex], 0, TRUE);
+	pmap_zero_page(m);
+#else	
+	pmap->pm_pdir[m->pindex] = 0;
+#endif	
+	--pmap->pm_stats.resident_count;
+
+	/*
+	 * This is a release store so that the ordinary store unmapping
+	 * the page table page is globally performed before TLB shoot-
+	 * down is begun.
+	 */
+	atomic_subtract_rel_int(&cnt.v_wire_count, 1);
+
+	/*
+	 * Do an invltlb to make the invalidated mapping
+	 * take effect immediately.
+	 */
+	pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex);
+	pmap_invalidate_page(pmap, pteva);
+
+	/* 
+	 * Put page on a list so that it is released after
+	 * *ALL* TLB shootdown is done
+	 */
+	m->right = *free;
+	*free = m;
+
+	return 1;
+}
+
+/*
+ * After removing a page table entry, this routine is used to
+ * conditionally free the page, and manage the hold/wire counts.
+ */
+static int
+pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free)
+{
+	pd_entry_t ptepde;
+	vm_page_t mpte;
+
+	if (va >= VM_MAXUSER_ADDRESS)
+		return 0;
+	ptepde = PT_GET(pmap_pde(pmap, va));
+	mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME);
+	return pmap_unwire_pte_hold(pmap, mpte, free);
+}
+
+void
+pmap_pinit0(pmap)
+	struct pmap *pmap;
+{
+
+	PMAP_LOCK_INIT(pmap);
+	pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+#ifdef PAE
+	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
+	pmap->pm_active = 0;
+	PCPU_SET(curpmap, pmap);
+	TAILQ_INIT(&pmap->pm_pvlist);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+}
+
+/*
+ * Initialize a preallocated and zeroed pmap structure,
+ * such as one in a vmspace structure.
+ */
+void
+pmap_pinit(struct pmap *pmap)
+{
+#ifdef XEN
+	vm_page_t m, ptdpg[NPGPTD + 1];
+	int npgptd = NPGPTD + 1;
+#else
+	vm_page_t m, ptdpg[NPGPTD];
+	vm_paddr_t pa;
+	int npgptd = NPGPTD;
+#endif
+	static int color;
+	int i;
+
+	PMAP_LOCK_INIT(pmap);
+
+	/*
+	 * No need to allocate page table space yet but we do need a valid
+	 * page directory table.
+	 */
+	if (pmap->pm_pdir == NULL) {
+		pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map,
+		    NBPTD);
+
+#if defined(PAE)
+#ifdef XEN
+		pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1);
+#else		
+		pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+		KASSERT(((vm_offset_t)pmap->pm_pdpt &
+		    ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+		    ("pmap_pinit: pdpt misaligned"));
+		KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+		    ("pmap_pinit: pdpt above 4g"));
+#endif /* !XEN */
+#endif /* PAE */
+	}
+
+	/*
+	 * allocate the page directory page(s)
+	 */
+	for (i = 0; i < npgptd;) {
+		m = vm_page_alloc(NULL, color++,
+		    VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
+		    VM_ALLOC_ZERO);
+		if (m == NULL)
+			VM_WAIT;
+		else {
+			ptdpg[i++] = m;
+		}
+	}
+
+	pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
+
+	for (i = 0; i < NPGPTD; i++) {
+		if ((ptdpg[i]->flags & PG_ZERO) == 0)
+			pagezero(&pmap->pm_pdir[i*NPDEPG]);
+	}
+
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+	/* Wire in kernel global address entries. */
+	bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+
+#ifdef PAE
+#ifdef XEN
+	pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1);
+	if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0)
+		bzero(pmap->pm_pdpt, PAGE_SIZE);
+#endif	
+	for (i = 0; i < NPGPTD; i++) {
+		vm_paddr_t ma;
+		
+		ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
+		pmap->pm_pdpt[i] = ma | PG_V;
+
+	}
+#endif	
+
+#ifdef XEN
+	for (i = 0; i < NPGPTD; i++) {
+		pt_entry_t *pd;
+		vm_paddr_t ma;
+		
+		ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
+		pd = pmap->pm_pdir + (i * NPDEPG);
+		PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW));
+		
+	}
+	
+#ifdef PAE	
+	PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW);
+#endif
+	vm_page_lock_queues();
+	xen_flush_queue();
+	xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD])));
+	for (i = 0; i < NPGPTD; i++) {
+		vm_paddr_t ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i]));
+		PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE);
+	}
+	xen_flush_queue();
+	vm_page_unlock_queues();
+#else
+	/* install self-referential address mapping entry(s) */
+	for (i = 0; i < NPGPTD; i++) {
+		pa = VM_PAGE_TO_PHYS(ptdpg[i]);
+		pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
+#ifdef PAE
+		pmap->pm_pdpt[i] = pa | PG_V;
+#endif
+	}
+#endif
+	pmap->pm_active = 0;
+	TAILQ_INIT(&pmap->pm_pvlist);
+	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
+}
+
+/*
+ * this routine is called if the page table page is not
+ * mapped correctly.
+ */
+static vm_page_t
+_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags)
+{
+	vm_paddr_t ptepa;
+	vm_page_t m;
+
+	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+	    ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+	/*
+	 * Allocate a page table page.
+	 */
+	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
+	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+		if (flags & M_WAITOK) {
+			PMAP_UNLOCK(pmap);
+			vm_page_unlock_queues();
+			VM_WAIT;
+			vm_page_lock_queues();
+			PMAP_LOCK(pmap);
+		}
+
+		/*
+		 * Indicate the need to retry.  While waiting, the page table
+		 * page may have been allocated.
+		 */
+		return (NULL);
+	}
+	if ((m->flags & PG_ZERO) == 0)
+		pmap_zero_page(m);
+
+	/*
+	 * Map the pagetable page into the process address space, if
+	 * it isn't already there.
+	 */
+
+	pmap->pm_stats.resident_count++;
+#ifdef XEN
+	ptepa = xpmap_ptom(VM_PAGE_TO_PHYS(m));
+	xen_pt_pin(ptepa);
+	PT_SET_VA_MA(&pmap->pm_pdir[ptepindex],
+		(ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE);
+	
+	KASSERT(pmap->pm_pdir[ptepindex],
+	    ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex));
+#else
+	ptepa = VM_PAGE_TO_PHYS(m);
+	pmap->pm_pdir[ptepindex] =
+		(pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
+#endif
+	return (m);
+}
+
+static vm_page_t
+pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
+{
+	unsigned ptepindex;
+	pd_entry_t ptepa;
+	vm_page_t m;
+
+	KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT ||
+	    (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK,
+	    ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK"));
+
+	/*
+	 * Calculate pagetable page index
+	 */
+	ptepindex = va >> PDRSHIFT;
+retry:
+	/*
+	 * Get the page directory entry
+	 */
+	ptepa = pmap->pm_pdir[ptepindex];
+
+	/*
+	 * XXX track me down and fix me!
+	 */
+	if ((ptepa & PG_V) == 0) {
+		if (ptepa && ((ptepa & PG_V) == 0))
+			panic("phys addr set but not valid");
+	}
+	
+	/*
+	 * This supports switching from a 4MB page to a
+	 * normal 4K page.
+	 */
+	if (ptepa & PG_PS) {
+		pmap->pm_pdir[ptepindex] = 0;
+		ptepa = 0;
+		pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+		pmap_invalidate_all(kernel_pmap);
+	}
+
+	/*
+	 * If the page table page is mapped, we just increment the
+	 * hold count, and activate it.
+	 */
+	if (ptepa) {
+#ifdef XEN
+		m = PHYS_TO_VM_PAGE(xpmap_mtop(ptepa));		
+#else		
+		m = PHYS_TO_VM_PAGE(ptepa);
+#endif
+		m->wire_count++;
+	} else {
+		/*
+		 * Here if the pte page isn't mapped, or if it has
+		 * been deallocated. 
+		 */
+		CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x",
+		    pmap, va, flags);
+		
+		m = _pmap_allocpte(pmap, ptepindex, flags);
+		if (m == NULL && (flags & M_WAITOK))
+			goto retry;
+		KASSERT(pmap->pm_pdir[ptepindex],
+		    ("ptepindex=%d did not get mapped", ptepindex));
+	}
+	return (m);
+}
+
+
+/***************************************************
+* Pmap allocation/deallocation routines.
+ ***************************************************/
+
+#ifdef SMP
+/*
+ * Deal with a SMP shootdown of other users of the pmap that we are
+ * trying to dispose of.  This can be a bit hairy.
+ */
+static u_int *lazymask;
+static u_int lazyptd;
+static volatile u_int lazywait;
+
+void pmap_lazyfix_action(void);
+
+void
+pmap_lazyfix_action(void)
+{
+	u_int mymask = PCPU_GET(cpumask);
+
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	atomic_clear_int(lazymask, mymask);
+	atomic_store_rel_int(&lazywait, 1);
+}
+
+static void
+pmap_lazyfix_self(u_int mymask)
+{
+
+	if (rcr3() == lazyptd)
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+	atomic_clear_int(lazymask, mymask);
+}
+
+
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int mymask;
+	u_int mask;
+	register u_int spins;
+
+	while ((mask = pmap->pm_active) != 0) {
+		spins = 50000000;
+		mask = mask & -mask;	/* Find least significant set bit */
+		mtx_lock_spin(&smp_ipi_mtx);
+#ifdef PAE
+		lazyptd = vtophys(pmap->pm_pdpt);
+#else
+		lazyptd = vtophys(pmap->pm_pdir);
+#endif
+		mymask = PCPU_GET(cpumask);
+		if (mask == mymask) {
+			lazymask = &pmap->pm_active;
+			pmap_lazyfix_self(mymask);
+		} else {
+			atomic_store_rel_int((u_int *)&lazymask,
+			    (u_int)&pmap->pm_active);
+			atomic_store_rel_int(&lazywait, 0);
+			ipi_selected(mask, IPI_LAZYPMAP);
+			while (lazywait == 0) {
+				ia32_pause();
+				if (--spins == 0)
+					break;
+			}
+		}
+		mtx_unlock_spin(&smp_ipi_mtx);
+		if (spins == 0)
+			printf("pmap_lazyfix: spun for 50000000\n");
+	}
+}
+
+#else	/* SMP */
+
+/*
+ * Cleaning up on uniprocessor is easy.  For various reasons, we're
+ * unlikely to have to even execute this code, including the fact
+ * that the cleanup is deferred until the parent does a wait(2), which
+ * means that another userland process has run.
+ */
+static void
+pmap_lazyfix(pmap_t pmap)
+{
+	u_int cr3;
+
+	cr3 = vtophys(pmap->pm_pdir);
+	if (cr3 == rcr3()) {
+		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
+		pmap->pm_active &= ~(PCPU_GET(cpumask));
+	}
+}
+#endif	/* SMP */
+
+/*
+ * Release any resources held by the given physical map.
+ * Called when a pmap initialized by pmap_pinit is being released.
+ * Should only be called if the map contains no valid mappings.
+ */
+void
+pmap_release(pmap_t pmap)
+{
+#ifdef XEN
+	vm_page_t m, ptdpg[NPGPTD+1];
+	int npgptd = NPGPTD + 1;
+#else	
+	vm_page_t m, ptdpg[NPGPTD];
+	int npgptd = NPGPTD;
+#endif
+	int i;
+
+	KASSERT(pmap->pm_stats.resident_count == 0,
+	    ("pmap_release: pmap resident count %ld != 0",
+	    pmap->pm_stats.resident_count));
+
+	pmap_lazyfix(pmap);
+	mtx_lock_spin(&allpmaps_lock);
+	LIST_REMOVE(pmap, pm_list);
+	mtx_unlock_spin(&allpmaps_lock);
+
+#ifdef XEN	
+	for (i = 0; i < NPGPTD; i++)
+		ptdpg[i] = PHYS_TO_VM_PAGE(xpmap_mtop(pmap->pm_pdir[PTDPTDI + i]));
+#else
+	for (i = 0; i < NPGPTD; i++)
+		ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]);
+#endif	
+	
+	bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
+	    sizeof(*pmap->pm_pdir));
+
+	pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
+#if defined(PAE) && defined(XEN)
+	ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt));
+#endif	
+
+	vm_page_lock_queues();
+	for (i = 0; i < npgptd; i++) {
+		vm_paddr_t ma;
+		
+		m = ptdpg[i];
+		ma = xpmap_ptom(VM_PAGE_TO_PHYS(m));
+		/* unpinning L1 and L2 treated the same */
+                xen_pgd_unpin(ma);
+#ifdef PAE
+		KASSERT(
+#ifdef XEN			
+			xpmap_ptom(VM_PAGE_TO_PHYS(m))
+#else
+			VM_PAGE_TO_PHYS(m)
+#endif
+			    == (pmap->pm_pdpt[i] & PG_FRAME),
+		    ("pmap_release: got wrong ptd page"));
+#endif
+		m->wire_count--;
+		atomic_subtract_int(&cnt.v_wire_count, 1);
+		vm_page_free(m);
+	}
+	vm_page_unlock_queues();
+	PMAP_LOCK_DESTROY(pmap);
+}
+
+static int
+kvm_size(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE;
+
+	return sysctl_handle_long(oidp, &ksize, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_size, "IU", "Size of KVM");
+
+static int
+kvm_free(SYSCTL_HANDLER_ARGS)
+{
+	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
+
+	return sysctl_handle_long(oidp, &kfree, 0, req);
+}
+SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 
+    0, 0, kvm_free, "IU", "Amount of KVM free");
+
+/*
+ * grow the number of kernel page table entries, if needed
+ */
+void
+pmap_growkernel(vm_offset_t addr)
+{
+	struct pmap *pmap;
+	vm_paddr_t ptppaddr;
+	vm_page_t nkpg;
+	pd_entry_t newpdir;
+
+	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
+	if (kernel_vm_end == 0) {
+		kernel_vm_end = KERNBASE;
+		nkpt = 0;
+		while (pdir_pde(PTD, kernel_vm_end)) {
+			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+			nkpt++;
+			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+				kernel_vm_end = kernel_map->max_offset;
+				break;
+			}
+		}
+	}
+	addr = roundup2(addr, PAGE_SIZE * NPTEPG);
+	if (addr - 1 >= kernel_map->max_offset)
+		addr = kernel_map->max_offset;
+	while (kernel_vm_end < addr) {
+		if (pdir_pde(PTD, kernel_vm_end)) {
+			kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+				kernel_vm_end = kernel_map->max_offset;
+				break;
+			}
+			continue;
+		}
+
+		nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT,
+		    VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED);
+		if (nkpg == NULL)
+			panic("pmap_growkernel: no memory to grow kernel");
+
+		nkpt++;
+
+		pmap_zero_page(nkpg);
+		ptppaddr = VM_PAGE_TO_PHYS(nkpg);
+		newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
+		vm_page_lock_queues();
+		PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
+		mtx_lock_spin(&allpmaps_lock);
+		LIST_FOREACH(pmap, &allpmaps, pm_list) {
+			PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE);
+		}
+		mtx_unlock_spin(&allpmaps_lock);
+		vm_page_unlock_queues();
+		
+		kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
+		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
+			kernel_vm_end = kernel_map->max_offset;
+			break;
+		}
+	}
+}
+
+
+/***************************************************
+ * page management routines.
+ ***************************************************/
+
+/*
+ * free the pv_entry back to the free list
+ */
+static PMAP_INLINE void
+free_pv_entry(pv_entry_t pv)
+{
+	pv_entry_count--;
+	uma_zfree(pvzone, pv);
+}
+
+/*
+ * get a new pv_entry, allocating a block from the system
+ * when needed.
+ * the memory allocation is performed bypassing the malloc code
+ * because of the possibility of allocations at interrupt time.
+ */
+static pv_entry_t
+get_pv_entry(void)
+{
+	pv_entry_count++;
+	if ((pv_entry_count > pv_entry_high_water) &&
+		(pmap_pagedaemon_waken == 0)) {
+		pmap_pagedaemon_waken = 1;
+		wakeup (&vm_pages_needed);
+	}
+	return uma_zalloc(pvzone, M_NOWAIT);
+}
+
+
+static void
+pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va)
+{
+	pv_entry_t pv;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if (m->md.pv_list_count < pmap->pm_stats.resident_count) {
+		TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+			if (pmap == pv->pv_pmap && va == pv->pv_va) 
+				break;
+		}
+	} else {
+		TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) {
+			if (va == pv->pv_va) 
+				break;
+		}
+	}
+	KASSERT(pv != NULL, ("pmap_remove_entry: pv not found"));
+	TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+	m->md.pv_list_count--;
+	if (TAILQ_EMPTY(&m->md.pv_list))
+		vm_page_flag_clear(m, PG_WRITEABLE);
+	TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+	free_pv_entry(pv);
+}
+
+/*
+ * Create a pv entry for page at pa for
+ * (pmap, va).
+ */
+static void
+pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	pv_entry_t pv;
+
+	pv = get_pv_entry();
+	if (pv == NULL)
+		panic("no pv entries: increase vm.pmap.shpgperproc");
+	pv->pv_va = va;
+	pv->pv_pmap = pmap;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
+	TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+	m->md.pv_list_count++;
+}
+
+/*
+ * Conditionally create a pv entry.
+ */
+static boolean_t
+pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m)
+{
+	pv_entry_t pv;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if (pv_entry_count < pv_entry_high_water && 
+	    (pv = uma_zalloc(pvzone, M_NOWAIT)) != NULL) {
+		pv_entry_count++;
+		pv->pv_va = va;
+		pv->pv_pmap = pmap;
+		TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist);
+		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+		m->md.pv_list_count++;
+		return (TRUE);
+	} else
+		return (FALSE);
+}
+
+/*
+ * pmap_remove_pte: do the things to unmap a page in a process
+ */
+static int
+pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free)
+{
+	pt_entry_t oldpte;
+	vm_page_t m;
+
+	CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x",
+	    pmap, (u_long)*ptq, va);
+	
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+#ifdef XEN
+	oldpte = *ptq;
+	PT_SET_VA_MA(ptq, 0, TRUE);
+#else
+	oldpte = pte_load_clear(ptq);
+#endif
+	if (oldpte & PG_W)
+		pmap->pm_stats.wired_count -= 1;
+	/*
+	 * Machines that don't support invlpg, also don't support
+	 * PG_G.
+	 */
+	if (oldpte & PG_G)
+		pmap_invalidate_page(kernel_pmap, va);
+	pmap->pm_stats.resident_count -= 1;
+	if (oldpte & PG_MANAGED) {
+		m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte));
+		if (oldpte & PG_M) {
+			KASSERT((oldpte & PG_RW),
+	("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx",
+			    va, (uintmax_t)oldpte));
+			vm_page_dirty(m);
+		}
+		if (oldpte & PG_A)
+			vm_page_flag_set(m, PG_REFERENCED);
+		pmap_remove_entry(pmap, m, va);
+	}
+	return (pmap_unuse_pt(pmap, va, free));
+}
+
+/*
+ * Remove a single page from a process address space
+ */
+static void
+pmap_remove_page(pmap_t pmap, vm_offset_t va)
+{
+	pt_entry_t *pte;
+	vm_page_t free = NULL;
+
+	CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x",
+	    pmap, va);
+	
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	KASSERT(curthread->td_pinned > 0, ("curthread not pinned"));
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
+		return;
+	pmap_remove_pte(pmap, pte, va, &free);
+	pmap_invalidate_page(pmap, va);
+	pmap_free_zero_pages(free);
+}
+
+/*
+ *	Remove the given range of addresses from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	vm_offset_t pdnxt;
+	pd_entry_t ptpaddr;
+	pt_entry_t *pte;
+	vm_page_t free = NULL;
+	int anyvalid;
+
+	CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x",
+	    pmap, sva, eva);
+	
+	/*
+	 * Perform an unsynchronized read.  This is, however, safe.
+	 */
+	if (pmap->pm_stats.resident_count == 0)
+		return;
+
+	anyvalid = 0;
+
+	vm_page_lock_queues();
+	sched_pin();
+	PMAP_LOCK(pmap);
+
+	/*
+	 * special handling of removing one page.  a very
+	 * common operation and easy to short circuit some
+	 * code.
+	 */
+	if ((sva + PAGE_SIZE == eva) && 
+	    ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
+		pmap_remove_page(pmap, sva);
+		goto out;
+	}
+
+	for (; sva < eva; sva = pdnxt) {
+		unsigned pdirindex;
+
+		/*
+		 * Calculate index for next page table.
+		 */
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+		if (pmap->pm_stats.resident_count == 0)
+			break;
+
+		pdirindex = sva >> PDRSHIFT;
+		ptpaddr = pmap->pm_pdir[pdirindex];
+
+		/*
+		 * Weed out invalid mappings. Note: we assume that the page
+		 * directory table is always allocated, and in kernel virtual.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+#ifdef XEN
+			PT_SET_VA_MA(&pmap->pm_pdir[pdirindex], 0, TRUE);
+#else			
+			pmap->pm_pdir[pdirindex] = 0;
+#endif			
+			pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
+			anyvalid = 1;
+			continue;
+		}
+
+		/*
+		 * Limit our scan to either the end of the va represented
+		 * by the current page table page, or to the end of the
+		 * range being removed.
+		 */
+		if (pdnxt > eva)
+			pdnxt = eva;
+
+		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+		    sva += PAGE_SIZE) {
+			if (*pte == 0)
+				continue;
+
+			/*
+			 * The TLB entry for a PG_G mapping is invalidated
+			 * by pmap_remove_pte().
+			 */
+			if ((*pte & PG_G) == 0)
+				anyvalid = 1;
+			if (pmap_remove_pte(pmap, pte, sva, &free))
+				break;
+		}
+	}
+	PT_UPDATES_FLUSH();
+	
+out:
+	if (anyvalid) {
+		pmap_invalidate_all(pmap);
+		pmap_free_zero_pages(free);
+	}
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Routine:	pmap_remove_all
+ *	Function:
+ *		Removes this physical page from
+ *		all physical maps in which it resides.
+ *		Reflects back modify bits to the pager.
+ *
+ *	Notes:
+ *		Original versions of this routine were very
+ *		inefficient because they iteratively called
+ *		pmap_remove (slow...)
+ */
+
+void
+pmap_remove_all(vm_page_t m)
+{
+	register pv_entry_t pv;
+	pt_entry_t *pte, tpte;
+	vm_page_t free;
+
+#if defined(PMAP_DIAGNOSTIC)
+	/*
+	 * XXX This makes pmap_remove_all() illegal for non-managed pages!
+	 */
+	if (m->flags & PG_FICTITIOUS) {
+		panic("pmap_remove_all: illegal for unmanaged page, va: 0x%jx",
+		    VM_PAGE_TO_PHYS(m));
+	}
+#endif
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	sched_pin();
+	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+		PMAP_LOCK(pv->pv_pmap);
+		pv->pv_pmap->pm_stats.resident_count--;
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+#if defined(XEN)
+		tpte = *pte;
+		PT_SET_VA_MA(pte, 0, TRUE);
+#else		
+		tpte = pte_load_clear(pte);
+#endif
+		if (tpte & PG_W)
+			pv->pv_pmap->pm_stats.wired_count--;
+		if (tpte & PG_A)
+			vm_page_flag_set(m, PG_REFERENCED);
+
+		/*
+		 * Update the vm_page_t clean and reference bits.
+		 */
+		if (tpte & PG_M) {
+			KASSERT((tpte & PG_RW),
+	("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx",
+			    pv->pv_va, (uintmax_t)tpte));
+			vm_page_dirty(m);
+		}
+		free = NULL;
+		pmap_unuse_pt(pv->pv_pmap, pv->pv_va, &free);
+		pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+		pmap_free_zero_pages(free);
+		TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist);
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		m->md.pv_list_count--;
+		PMAP_UNLOCK(pv->pv_pmap);
+		free_pv_entry(pv);
+	}
+	vm_page_flag_clear(m, PG_WRITEABLE);
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+}
+
+/*
+ *	Set the physical protection on the
+ *	specified range of this map as requested.
+ */
+void
+pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
+{
+	vm_offset_t pdnxt;
+	pd_entry_t ptpaddr;
+	pt_entry_t *pte;
+	int anychanged;
+
+	CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x",
+	    pmap, sva, eva, prot);
+
+	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
+		pmap_remove(pmap, sva, eva);
+		return;
+	}
+
+	if (prot & VM_PROT_WRITE)
+		return;
+
+	anychanged = 0;
+
+	vm_page_lock_queues();
+	sched_pin();
+	PMAP_LOCK(pmap);
+	for (; sva < eva; sva = pdnxt) {
+		unsigned pdirindex;
+		vm_paddr_t obits, pbits; 
+
+		pdnxt = (sva + NBPDR) & ~PDRMASK;
+
+		pdirindex = sva >> PDRSHIFT;
+		ptpaddr = pmap->pm_pdir[pdirindex];
+
+		/*
+		 * Weed out invalid mappings. Note: we assume that the page
+		 * directory table is always allocated, and in kernel virtual.
+		 */
+		if (ptpaddr == 0)
+			continue;
+
+		/*
+		 * Check for large page.
+		 */
+		if ((ptpaddr & PG_PS) != 0) {
+			pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+			anychanged = 1;
+			continue;
+		}
+
+		if (pdnxt > eva)
+			pdnxt = eva;
+
+		for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++,
+		    sva += PAGE_SIZE) {
+			vm_page_t m;
+
+retry:
+			/*
+			 * Regardless of whether a pte is 32 or 64 bits in
+			 * size, PG_RW, PG_A, and PG_M are among the least
+			 * significant 32 bits.
+			 */
+			obits = pbits = *pte;
+			if (pbits & PG_MANAGED) {
+#ifdef XEN				
+				pt_entry_t pteval = xpmap_mtop(*pte);
+#else
+				pt_entry_t pteval = *pte;
+#endif
+				
+				m = NULL;
+				if (pbits & PG_A) {
+					m = PHYS_TO_VM_PAGE(pteval);
+
+					vm_page_flag_set(m, PG_REFERENCED);
+					pbits &= ~PG_A;
+				}
+				if ((pbits & PG_M) != 0) {
+					if (m == NULL)
+						m = PHYS_TO_VM_PAGE(pteval);
+					vm_page_dirty(m);
+				}
+			}
+
+			pbits &= ~(PG_RW | PG_M);
+
+			if (pbits != obits) {
+#ifdef XEN
+				obits = *pte;
+				PT_SET_VA_MA(pte, pbits, TRUE);
+				if (*pte != pbits)
+					goto retry;
+#else						
+				if (!atomic_cmpset_int((u_int *)pte, obits,
+				    pbits))
+					goto retry;
+#endif
+				if (obits & PG_G)
+					pmap_invalidate_page(pmap, sva);
+				else
+					anychanged = 1;
+			}
+		}
+	}
+	PT_UPDATES_FLUSH();
+	if (anychanged)
+		pmap_invalidate_all(pmap);
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	Insert the given physical page (p) at
+ *	the specified virtual address (v) in the
+ *	target physical map with the protection requested.
+ *
+ *	If specified, the page will be wired down, meaning
+ *	that the related pte can not be reclaimed.
+ *
+ *	NB:  This is the only routine which MAY NOT lazy-evaluate
+ *	or lose information.  That is, this routine must actually
+ *	insert this page into the given map NOW.
+ */
+void
+pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
+	   boolean_t wired)
+{
+	vm_paddr_t pa;
+	pd_entry_t *pde;
+	register pt_entry_t *pte;
+	vm_paddr_t opa;
+	pt_entry_t origpte, newpte;
+	vm_page_t mpte, om;
+	boolean_t invlva;
+
+	
+	CTR5(KTR_PMAP,
+	    "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x wired=%d",
+	    pmap, va, xpmap_ptom(VM_PAGE_TO_PHYS(m)), prot, wired);
+	va &= PG_FRAME;
+#ifdef PMAP_DIAGNOSTIC
+	if (va > VM_MAX_KERNEL_ADDRESS)
+		panic("pmap_enter: toobig");
+	if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS))
+		panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va);
+#endif
+
+	mpte = NULL;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	sched_pin();
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+	if (va < VM_MAXUSER_ADDRESS) {
+		mpte = pmap_allocpte(pmap, va, M_WAITOK);
+	}
+#if 0 && defined(PMAP_DIAGNOSTIC)
+	else {
+		pd_entry_t *pdeaddr = pmap_pde(pmap, va);
+		origpte = *pdeaddr;
+		if ((origpte & PG_V) == 0) { 
+			panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
+				pmap->pm_pdir[PTDPTDI], origpte, va);
+		}
+	}
+#endif
+
+	pde = pmap_pde(pmap, va);
+	if ((*pde & PG_PS) != 0)
+		panic("pmap_enter: attempted pmap_enter on 4MB page");
+	pte = pmap_pte_quick(pmap, va);
+
+	/*
+	 * Page Directory table entry not valid, we need a new PT page
+	 */
+	if (pte == NULL) {
+		panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n",
+			(uintmax_t)pmap->pm_pdir[PTDPTDI], va);
+	}
+
+	pa = VM_PAGE_TO_PHYS(m);
+	om = NULL;
+	
+	origpte = *pte;
+	if (origpte)
+		origpte = xpmap_mtop(origpte);
+	opa = origpte & PG_FRAME;
+
+	/*
+	 * Mapping has not changed, must be protection or wiring change.
+	 */
+	if (origpte && (opa == pa)) {
+		/*
+		 * Wiring change, just update stats. We don't worry about
+		 * wiring PT pages as they remain resident as long as there
+		 * are valid mappings in them. Hence, if a user page is wired,
+		 * the PT page will be also.
+		 */
+		if (wired && ((origpte & PG_W) == 0))
+			pmap->pm_stats.wired_count++;
+		else if (!wired && (origpte & PG_W))
+			pmap->pm_stats.wired_count--;
+
+		/*
+		 * Remove extra pte reference
+		 */
+		if (mpte)
+			mpte->wire_count--;
+
+		/*
+		 * We might be turning off write access to the page,
+		 * so we go ahead and sense modify status.
+		 */
+		if (origpte & PG_MANAGED) {
+			om = m;
+			pa |= PG_MANAGED;
+		}
+		goto validate;
+	} 
+	/*
+	 * Mapping has changed, invalidate old range and fall through to
+	 * handle validating new mapping.
+	 */
+	if (opa) {
+		if (origpte & PG_W)
+			pmap->pm_stats.wired_count--;
+		if (origpte & PG_MANAGED) {
+			om = PHYS_TO_VM_PAGE(opa);
+			pmap_remove_entry(pmap, om, va);
+		}
+		if (mpte != NULL) {
+			mpte->wire_count--;
+			KASSERT(mpte->wire_count > 0,
+			    ("pmap_enter: missing reference to page table page,"
+			     " va: 0x%x", va));
+		}
+	} else
+		pmap->pm_stats.resident_count++;
+
+	/*
+	 * Enter on the PV list if part of our managed memory.
+	 */
+	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) {
+		KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva,
+		    ("pmap_enter: managed mapping within the clean submap"));
+		pmap_insert_entry(pmap, va, m);
+		pa |= PG_MANAGED;
+	}
+
+	/*
+	 * Increment counters
+	 */
+	if (wired)
+		pmap->pm_stats.wired_count++;
+
+validate:
+	/*
+	 * Now validate mapping with desired protection/wiring.
+	 */
+	newpte = (pt_entry_t)(pa | PG_V);
+	if ((prot & VM_PROT_WRITE) != 0)
+		newpte |= PG_RW;
+	if (wired)
+		newpte |= PG_W;
+	if (va < VM_MAXUSER_ADDRESS)
+		newpte |= PG_U;
+	if (pmap == kernel_pmap)
+		newpte |= pgeflag;
+
+	critical_enter();
+	/*
+	 * if the mapping or permission bits are different, we need
+	 * to update the pte.
+	 */
+	if ((origpte & ~(PG_M|PG_A)) != newpte) {
+		if (origpte & PG_V) {
+			invlva = FALSE;
+#ifdef XEN
+			origpte = *pte;
+			PT_SET_VA(pte, newpte | PG_A, FALSE);
+#else			
+			origpte = pte_load_store(pte, newpte | PG_A);
+#endif
+			if (origpte & PG_A) {
+				if (origpte & PG_MANAGED)
+					vm_page_flag_set(om, PG_REFERENCED);
+				if (opa != VM_PAGE_TO_PHYS(m))
+					invlva = TRUE;
+			}
+			if (origpte & PG_M) {
+				KASSERT((origpte & PG_RW),
+	("pmap_enter: modified page not writable: va: %#x, pte: %#jx",
+				    va, (uintmax_t)origpte));
+				if ((origpte & PG_MANAGED) != 0)
+					vm_page_dirty(om);
+				if ((prot & VM_PROT_WRITE) == 0)
+					invlva = TRUE;
+			}
+			if (invlva)
+				pmap_invalidate_page(pmap, va);
+		} else {
+#ifdef XEN
+			PT_SET_VA(pte, newpte | PG_A, FALSE);
+#else			
+			pte_store(pte, newpte | PG_A);
+#endif
+		}
+	}
+	PT_UPDATES_FLUSH();
+	critical_exit();
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * Maps a sequence of resident pages belonging to the same object.
+ * The sequence begins with the given page m_start.  This page is
+ * mapped at the given virtual address start.  Each subsequent page is
+ * mapped at a virtual address that is offset from start by the same
+ * amount as the page is offset from m_start within the object.  The
+ * last page in the sequence is the page with the largest offset from
+ * m_start that can be mapped at a virtual address less than the given
+ * virtual address end.  Not every virtual page between start and end
+ * is mapped; only those for which a resident page exists with the
+ * corresponding offset from m_start are mapped.
+ */
+void
+pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
+    vm_page_t m_start, vm_prot_t prot)
+{
+	vm_page_t m, mpte;
+	vm_pindex_t diff, psize;
+
+	VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED);
+	psize = atop(end - start);
+	mpte = NULL;
+	m = m_start;
+	PMAP_LOCK(pmap);
+	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
+		mpte = pmap_enter_quick_locked(pmap, start + ptoa(diff), m,
+		    prot, mpte);
+		m = TAILQ_NEXT(m, listq);
+	}
+ 	PMAP_UNLOCK(pmap);
+}
+
+/*
+ * this code makes some *MAJOR* assumptions:
+ * 1. Current pmap & pmap exists.
+ * 2. Not wired.
+ * 3. Read access.
+ * 4. No page table pages.
+ * but is *MUCH* faster than pmap_enter...
+ */
+
+void
+pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
+{
+
+	PMAP_LOCK(pmap);
+	(void) pmap_enter_quick_locked(pmap, va, m, prot, NULL);
+	PMAP_UNLOCK(pmap);
+}
+
+static vm_page_t
+pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
+    vm_prot_t prot, vm_page_t mpte)
+{
+	pt_entry_t *pte;
+	vm_paddr_t pa;
+	vm_page_t free;
+	
+	pa = VM_PAGE_TO_PHYS(m);
+	pa = pa ? xpmap_ptom(pa) >> PAGE_SHIFT : 0;
+	
+	CTR4(KTR_PMAP,
+	    "pmap_enter_quick_locked: pmap=%p va=0x%08x mfn=%d prot=0x%x",
+	    pmap, va, pa, prot);
+	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
+	    (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0,
+	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+
+	/*
+	 * In the case that a page table page is not
+	 * resident, we are creating it here.
+	 */
+	if (va < VM_MAXUSER_ADDRESS) {
+		unsigned ptepindex;
+		pd_entry_t ptepa;
+
+		/*
+		 * Calculate pagetable page index
+		 */
+		ptepindex = va >> PDRSHIFT;
+		if (mpte && (mpte->pindex == ptepindex)) {
+			mpte->wire_count++;
+		} else {
+			/*
+			 * Get the page directory entry
+			 */
+			ptepa = pmap->pm_pdir[ptepindex];
+
+			/*
+			 * If the page table page is mapped, we just increment
+			 * the hold count, and activate it.
+			 */
+			if (ptepa) {
+				ptepa = xpmap_mtop(ptepa);
+				if (ptepa & PG_PS)
+					panic("pmap_enter_quick: unexpected mapping into 4MB page");
+				mpte = PHYS_TO_VM_PAGE(ptepa);
+				mpte->wire_count++;
+			} else {
+				mpte = _pmap_allocpte(pmap, ptepindex,
+				    M_NOWAIT);
+				if (mpte == NULL)
+					return (mpte);
+			}
+		}
+	} else {
+		mpte = NULL;
+	}
+
+	/*
+	 * This call to vtopte makes the assumption that we are
+	 * entering the page into the current pmap.  In order to support
+	 * quick entry into any pmap, one would likely use pmap_pte_quick.
+	 * But that isn't as quick as vtopte.
+	 */
+	pte = vtopte(va);
+	if (*pte) {
+		if (mpte != NULL) {
+			mpte->wire_count--;
+			mpte = NULL;
+		}
+		return (mpte);
+	}
+
+	/*
+	 * Enter on the PV list if part of our managed memory.
+	 */
+	if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 &&
+	    !pmap_try_insert_pv_entry(pmap, va, m)) {
+		if (mpte != NULL) {
+			free = NULL;
+			if (pmap_unwire_pte_hold(pmap, mpte, &free)) {
+				pmap_invalidate_page(pmap, va);
+				pmap_free_zero_pages(free);
+			}
+			
+			mpte = NULL;
+		}
+		return (mpte);
+	}
+
+	/*
+	 * Increment counters
+	 */
+	pmap->pm_stats.resident_count++;
+
+	pa = VM_PAGE_TO_PHYS(m);
+
+	/*
+	 * Now validate mapping with RO protection
+	 */
+#ifdef XEN
+	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+		PT_SET_VA(pte, pa | PG_V | PG_U, TRUE);
+	else
+		PT_SET_VA(pte, pa | PG_V | PG_U | PG_MANAGED, TRUE);
+#else	
+	if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+		pte_store(pte, pa | PG_V | PG_U);
+	else
+		pte_store(pte, pa | PG_V | PG_U | PG_MANAGED);
+#endif
+	return (mpte);
+}
+
+/*
+ * Make a temporary mapping for a physical address.  This is only intended
+ * to be used for panic dumps.
+ */
+void *
+pmap_kenter_temporary(vm_paddr_t pa, int i)
+{
+	vm_offset_t va;
+
+	va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
+	pmap_kenter(va, pa);
+	invlpg(va);
+	return ((void *)crashdumpmap);
+}
+
+/*
+ * This code maps large physical mmap regions into the
+ * processor address space.  Note that some shortcuts
+ * are taken, but the code works.
+ */
+void
+pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
+		    vm_object_t object, vm_pindex_t pindex,
+		    vm_size_t size)
+{
+	vm_page_t p;
+
+	CTR5(KTR_PMAP,
+	    "pmap_object_init_pt: pmap=%p addr=0x%08x object=%p pindex=%d size=%d",
+	    pmap, addr, object, pindex, size);
+	VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
+	KASSERT(object->type == OBJT_DEVICE,
+	    ("pmap_object_init_pt: non-device object"));
+	if (pseflag && 
+	    ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
+		int i;
+		vm_page_t m[1];
+		unsigned int ptepindex;
+		int npdes;
+		pd_entry_t ptepa;
+
+		PMAP_LOCK(pmap);
+		if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
+			goto out;
+		PMAP_UNLOCK(pmap);
+retry:
+		p = vm_page_lookup(object, pindex);
+		if (p != NULL) {
+			vm_page_lock_queues();
+			if (vm_page_sleep_if_busy(p, FALSE, "init4p"))
+				goto retry;
+		} else {
+			p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL);
+			if (p == NULL)
+				return;
+			m[0] = p;
+
+			if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) {
+				vm_page_lock_queues();
+				vm_page_free(p);
+				vm_page_unlock_queues();
+				return;
+			}
+
+			p = vm_page_lookup(object, pindex);
+			vm_page_lock_queues();
+			vm_page_wakeup(p);
+		}
+		vm_page_unlock_queues();
+		pmap_zero_page(p);
+		
+		ptepa = VM_PAGE_TO_PHYS(p);
+		if (ptepa & (NBPDR - 1))
+			return;
+
+		p->valid = VM_PAGE_BITS_ALL;
+
+		PMAP_LOCK(pmap);
+		pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
+		npdes = size >> PDRSHIFT;
+		critical_enter();
+		for (i = 0; i < npdes; i++) {
+#ifdef XEN			
+			int flags = PG_U | PG_RW | PG_V | PG_PS;
+#else
+			int flags = PG_U | PG_V | PG_PS;
+#endif
+			pde_store(&pmap->pm_pdir[ptepindex],
+			    ptepa | flags);
+			ptepa += NBPDR;
+			ptepindex += 1;
+		}
+		pmap_invalidate_all(pmap);
+		critical_exit();
+	out:
+		PMAP_UNLOCK(pmap);
+	}
+}
+
+/*
+ *	Routine:	pmap_change_wiring
+ *	Function:	Change the wiring attribute for a map/virtual-address
+ *			pair.
+ *	In/out conditions:
+ *			The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(pmap, va, wired)
+	register pmap_t pmap;
+	vm_offset_t va;
+	boolean_t wired;
+{
+	register pt_entry_t *pte;
+
+	vm_page_lock_queues();
+	PMAP_LOCK(pmap);
+	pte = pmap_pte(pmap, va);
+
+	if (wired && !pmap_pte_w(pte)) {
+		PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE);
+		pmap->pm_stats.wired_count++;
+	} else if (!wired && pmap_pte_w(pte)) {
+		PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE);
+		pmap->pm_stats.wired_count--;
+	}
+	
+	/*
+	 * Wiring is not a hardware characteristic so there is no need to
+	 * invalidate TLB.
+	 */
+	pmap_pte_release(pte);
+	PMAP_UNLOCK(pmap);
+	vm_page_unlock_queues();
+}
+
+
+
+/*
+ *	Copy the range specified by src_addr/len
+ *	from the source map to the range dst_addr/len
+ *	in the destination map.
+ *
+ *	This routine is only advisory and need not do anything.
+ */
+
+void
+pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
+	  vm_offset_t src_addr)
+{
+	vm_page_t   free;
+	vm_offset_t addr;
+	vm_offset_t end_addr = src_addr + len;
+	vm_offset_t pdnxt;
+
+	if (dst_addr != src_addr)
+		return;
+
+	if (!pmap_is_current(src_pmap))
+		return;
+
+	CTR5(KTR_PMAP,
+	    "pmap_copy:  dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x",
+	    dst_pmap, src_pmap, dst_addr, len, src_addr);
+
+	vm_page_lock_queues();
+	if (dst_pmap < src_pmap) {
+		PMAP_LOCK(dst_pmap);
+		PMAP_LOCK(src_pmap);
+	} else {
+		PMAP_LOCK(src_pmap);
+		PMAP_LOCK(dst_pmap);
+	}
+	sched_pin();
+	for (addr = src_addr; addr < end_addr; addr = pdnxt) {
+		pt_entry_t *src_pte, *dst_pte;
+		vm_page_t dstmpte, srcmpte;
+		pd_entry_t srcptepaddr;
+		unsigned ptepindex;
+
+		if (addr >= UPT_MIN_ADDRESS)
+			panic("pmap_copy: invalid to pmap_copy page tables");
+
+		pdnxt = (addr + NBPDR) & ~PDRMASK;
+		ptepindex = addr >> PDRSHIFT;
+
+		srcptepaddr = src_pmap->pm_pdir[ptepindex];
+		if (srcptepaddr == 0)
+			continue;
+			
+		if (srcptepaddr & PG_PS) {
+			if (dst_pmap->pm_pdir[ptepindex] == 0) {
+				dst_pmap->pm_pdir[ptepindex] = srcptepaddr &
+				    ~PG_W;
+				dst_pmap->pm_stats.resident_count +=
+				    NBPDR / PAGE_SIZE;
+			}
+			continue;
+		}
+
+		srcmpte = MACH_TO_VM_PAGE(srcptepaddr);
+		if (srcmpte->wire_count == 0)
+			panic("pmap_copy: source page table page is unused");
+
+		if (pdnxt > end_addr)
+			pdnxt = end_addr;
+
+		src_pte = vtopte(addr);
+		while (addr < pdnxt) {
+			pt_entry_t ptetemp;
+			ptetemp = *src_pte;
+			/*
+			 * we only virtual copy managed pages
+			 */
+			if ((ptetemp & PG_MANAGED) != 0) {
+				dstmpte = pmap_allocpte(dst_pmap, addr,
+				    M_NOWAIT);
+				if (dstmpte == NULL)
+					break;
+				dst_pte = pmap_pte_quick(dst_pmap, addr);
+				if (*dst_pte == 0 &&
+				    pmap_try_insert_pv_entry(dst_pmap, addr,
+				    MACH_TO_VM_PAGE(ptetemp & PG_FRAME))) {
+					/*
+					 * Clear the wired, modified, and
+					 * accessed (referenced) bits
+					 * during the copy.
+					 */
+					PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M |
+						PG_A), FALSE);
+					dst_pmap->pm_stats.resident_count++;
+	 			} else {
+					free = NULL;
+					if (pmap_unwire_pte_hold( dst_pmap,
+					    dstmpte, &free)) {
+						pmap_invalidate_page(dst_pmap,
+						    addr);
+						pmap_free_zero_pages(free);
+					}
+				}
+				if (dstmpte->wire_count >= srcmpte->wire_count)
+					break;
+			}
+			addr += PAGE_SIZE;
+			src_pte++;
+		}
+	}
+	sched_unpin();
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(src_pmap);
+	PMAP_UNLOCK(dst_pmap);
+}	
+
+/*
+ *	pmap_zero_page zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ */
+void
+pmap_zero_page(vm_page_t m)
+{
+	struct sysmaps *sysmaps;
+
+	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+	mtx_lock(&sysmaps->lock);
+	if (*sysmaps->CMAP2)
+		panic("pmap_zero_page: CMAP2 busy");
+	sched_pin();
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M);
+#else	
+	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
+#endif
+	invlcaddr(sysmaps->CADDR2);
+	pagezero(sysmaps->CADDR2);
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR2, 0);
+#else	
+	*sysmaps->CMAP2 = 0;
+#endif
+	sched_unpin();
+	mtx_unlock(&sysmaps->lock);
+}
+
+/*
+ *	pmap_zero_page_area zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.
+ *
+ *	off and size may not cover an area beyond a single hardware page.
+ */
+void
+pmap_zero_page_area(vm_page_t m, int off, int size)
+{
+	struct sysmaps *sysmaps;
+
+	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+	mtx_lock(&sysmaps->lock);
+	if (*sysmaps->CMAP2)
+		panic("pmap_zero_page: CMAP2 busy");
+	sched_pin();
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M);
+#else	
+	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
+#endif
+	invlcaddr(sysmaps->CADDR2);
+	if (off == 0 && size == PAGE_SIZE) 
+		pagezero(sysmaps->CADDR2);
+	else
+		bzero((char *)sysmaps->CADDR2 + off, size);
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR2, 0);
+#else	
+	*sysmaps->CMAP2 = 0;
+#endif
+	sched_unpin();
+	mtx_unlock(&sysmaps->lock);
+}
+
+/*
+ *	pmap_zero_page_idle zeros the specified hardware page by mapping 
+ *	the page into KVM and using bzero to clear its contents.  This
+ *	is intended to be called from the vm_pagezero process only and
+ *	outside of Giant.
+ */
+void
+pmap_zero_page_idle(vm_page_t m)
+{
+
+	if (*CMAP3)
+		panic("pmap_zero_page: CMAP3 busy");
+	sched_pin();
+#ifdef XEN
+	PT_SET_MA(CADDR3, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M);
+#else
+	*CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
+#endif
+	invlcaddr(CADDR3);
+	pagezero(CADDR3);
+#ifdef XEN
+	PT_SET_MA(CADDR3, 0);
+#else	
+	*CMAP3 = 0;
+#endif
+	sched_unpin();
+}
+
+/*
+ *	pmap_copy_page copies the specified (machine independent)
+ *	page by mapping the page into virtual memory and using
+ *	bcopy to copy the page, one machine dependent page at a
+ *	time.
+ */
+void
+pmap_copy_page(vm_page_t src, vm_page_t dst)
+{
+	struct sysmaps *sysmaps;
+
+	sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)];
+	mtx_lock(&sysmaps->lock);
+	if (*sysmaps->CMAP1)
+		panic("pmap_copy_page: CMAP1 busy");
+	if (*sysmaps->CMAP2)
+		panic("pmap_copy_page: CMAP2 busy");
+	sched_pin();
+	invlpg((u_int)sysmaps->CADDR1);
+	invlpg((u_int)sysmaps->CADDR2);
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR1, PG_V | xpmap_ptom(VM_PAGE_TO_PHYS(src)) | PG_A);
+	PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(dst)) | PG_A | PG_M);
+
+#else	
+	*sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
+	*sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
+#endif
+	bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE);
+#ifdef XEN
+	PT_SET_MA(sysmaps->CADDR1, 0);
+	PT_SET_MA(sysmaps->CADDR2, 0);
+#else	
+	*sysmaps->CMAP1 = 0;
+	*sysmaps->CMAP2 = 0;
+#endif
+	sched_unpin();
+	mtx_unlock(&sysmaps->lock);
+}
+
+/*
+ * Returns true if the pmap's pv is one of the first
+ * 16 pvs linked to from this page.  This count may
+ * be changed upwards or downwards in the future; it
+ * is only necessary that true be returned for a small
+ * subset of pmaps for proper page aging.
+ */
+boolean_t
+pmap_page_exists_quick(pmap, m)
+	pmap_t pmap;
+	vm_page_t m;
+{
+	pv_entry_t pv;
+	int loops = 0;
+
+	if (m->flags & PG_FICTITIOUS)
+		return (FALSE);
+
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		if (pv->pv_pmap == pmap) {
+			return TRUE;
+		}
+		loops++;
+		if (loops >= 16)
+			break;
+	}
+	return (FALSE);
+}
+
+#define PMAP_REMOVE_PAGES_CURPROC_ONLY
+/*
+ * Remove all pages from specified address space
+ * this aids process exit speeds.  Also, this code
+ * is special cased for current process only, but
+ * can have the more generic (and slightly slower)
+ * mode enabled.  This is much faster than pmap_remove
+ * in the case of running down an entire address space.
+ */
+void
+pmap_remove_pages(pmap, sva, eva)
+	pmap_t pmap;
+	vm_offset_t sva, eva;
+{
+	pt_entry_t *pte, tpte;
+	vm_page_t m, free = NULL;
+	pv_entry_t pv, npv;
+
+	CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap);
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+	if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) {
+		printf("warning: pmap_remove_pages called with non-current pmap\n");
+		return;
+	}
+#endif
+	vm_page_lock_queues();
+	KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap"));
+	PMAP_LOCK(pmap);
+	sched_pin();
+	for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) {
+
+		if (pv->pv_va >= eva || pv->pv_va < sva) {
+			npv = TAILQ_NEXT(pv, pv_plist);
+			continue;
+		}
+
+#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
+		pte = vtopte(pv->pv_va);
+#else
+		pte = pmap_pte_quick(pmap, pv->pv_va);
+#endif
+		tpte = *pte ? xpmap_mtop(*pte) : 0;
+
+		if (tpte == 0) {
+			printf("TPTE at %p  IS ZERO @ VA %08x\n",
+							pte, pv->pv_va);
+			panic("bad pte");
+		}
+
+/*
+ * We cannot remove wired pages from a process' mapping at this time
+ */
+		if (tpte & PG_W) {
+			npv = TAILQ_NEXT(pv, pv_plist);
+			continue;
+		}
+
+		m = PHYS_TO_VM_PAGE(tpte);
+		KASSERT(m->phys_addr == (tpte & PG_FRAME),
+		    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
+		    m, (uintmax_t)m->phys_addr, (uintmax_t)tpte));
+
+		KASSERT(m < &vm_page_array[vm_page_array_size],
+			("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte));
+
+		pmap->pm_stats.resident_count--;
+
+#ifdef XEN
+		PT_SET_VA_MA(pte, 0, FALSE);		
+#else
+		pte_clear(pte);
+#endif
+		/*
+		 * Update the vm_page_t clean and reference bits.
+		 */
+		if (tpte & PG_M) {
+			vm_page_dirty(m);
+		}
+
+		npv = TAILQ_NEXT(pv, pv_plist);
+		TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist);
+
+		m->md.pv_list_count--;
+		TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+		if (TAILQ_EMPTY(&m->md.pv_list))
+			vm_page_flag_clear(m, PG_WRITEABLE);
+
+		pmap_unuse_pt(pmap, pv->pv_va, &free);
+		free_pv_entry(pv);
+	}
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+	pmap_invalidate_all(pmap);
+	pmap_free_zero_pages(free);
+	vm_page_unlock_queues();
+	PMAP_UNLOCK(pmap);
+}
+
+/*
+ *	pmap_is_modified:
+ *
+ *	Return whether or not the specified physical page was modified
+ *	in any physical maps.
+ */
+boolean_t
+pmap_is_modified(vm_page_t m)
+{
+	pv_entry_t pv;
+	pt_entry_t *pte;
+	boolean_t rv;
+
+	rv = FALSE;
+	if (m->flags & PG_FICTITIOUS)
+		return (rv);
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		PMAP_LOCK(pv->pv_pmap);
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+		rv = (*pte & PG_M) != 0;
+		PMAP_UNLOCK(pv->pv_pmap);
+		if (rv)
+			break;
+	}
+	sched_unpin();
+	return (rv);
+}
+
+/*
+ *	pmap_is_prefaultable:
+ *
+ *	Return whether or not the specified virtual address is elgible
+ *	for prefault.
+ */
+boolean_t
+pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
+{
+	pt_entry_t *pte;
+	boolean_t rv;
+
+	rv = FALSE;
+#ifdef XEN
+	/*
+	 * disable prefaulting to start off
+	 */
+	return (rv);
+#endif	
+	PMAP_LOCK(pmap);
+	if (*pmap_pde(pmap, addr)) {
+		pte = vtopte(addr);
+		rv = *pte == 0;
+	}
+	PMAP_UNLOCK(pmap);
+	return (rv);
+}
+
+
+void
+pmap_map_readonly(pmap_t pmap, vm_offset_t va, int len)
+{
+	int i, npages = round_page(len) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		pt_entry_t *pte;
+		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+		pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M)));
+		PMAP_MARK_PRIV(xpmap_mtop(*pte));
+		pmap_pte_release(pte);
+	}
+}
+
+void
+pmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len)
+{
+	int i, npages = round_page(len) >> PAGE_SHIFT;
+	for (i = 0; i < npages; i++) {
+		pt_entry_t *pte;
+		pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE));
+		PMAP_MARK_UNPRIV(xpmap_mtop(*pte));
+		pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M));
+		pmap_pte_release(pte);
+	}
+}
+
+/*
+ *	Clear the given bit in each of the given page's ptes.  The bit is
+ *	expressed as a 32-bit mask.  Consequently, if the pte is 64 bits in
+ *	size, only a bit within the least significant 32 can be cleared.
+ */
+static __inline void
+pmap_clear_ptes(vm_page_t m, int bit)
+{
+	register pv_entry_t pv;
+	pt_entry_t pbits, *pte;
+
+	if ((m->flags & PG_FICTITIOUS) ||
+	    (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0))
+		return;
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	/*
+	 * Loop over all current mappings setting/clearing as appropos If
+	 * setting RO do we need to clear the VAC?
+	 */
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		PMAP_LOCK(pv->pv_pmap);
+		pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+retry:
+		pbits = *pte;
+		if (pbits & bit) {
+			if (bit == PG_RW) {
+				/*
+				 * Regardless of whether a pte is 32 or 64 bits
+				 * in size, PG_RW and PG_M are among the least
+				 * significant 32 bits.
+				 */
+#ifdef XEN
+				PT_SET_VA_MA(pte, (pbits & ~(PG_RW|PG_M)), TRUE);
+				if (*pte != (pbits & ~(PG_RW|PG_M)))
+					goto retry;
+#else				
+				if (!atomic_cmpset_int((u_int *)pte, pbits,
+				    pbits & ~(PG_RW | PG_M)))
+					goto retry;
+#endif
+				if (pbits & PG_M) {
+					vm_page_dirty(m);
+				}
+			} else {
+#ifdef XEN
+				PT_SET_VA_MA(pte, pbits & ~bit, TRUE); 
+#else				
+				atomic_clear_int((u_int *)pte, bit);
+#endif
+			}
+			pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+		}
+		PMAP_UNLOCK(pv->pv_pmap);
+	}
+	if (bit == PG_RW)
+		vm_page_flag_clear(m, PG_WRITEABLE);
+	sched_unpin();
+}
+
+/*
+ *      pmap_page_protect:
+ *
+ *      Lower the permission for all mappings to a given page.
+ */
+void
+pmap_page_protect(vm_page_t m, vm_prot_t prot)
+{
+	if ((prot & VM_PROT_WRITE) == 0) {
+		if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) {
+			pmap_clear_ptes(m, PG_RW);
+		} else {
+			pmap_remove_all(m);
+		}
+	}
+}
+
+/*
+ *	pmap_ts_referenced:
+ *
+ *	Return a count of reference bits for a page, clearing those bits.
+ *	It is not necessary for every reference bit to be cleared, but it
+ *	is necessary that 0 only be returned when there are truly no
+ *	reference bits set.
+ *
+ *	XXX: The exact number of bits to check and clear is a matter that
+ *	should be tested and standardized at some point in the future for
+ *	optimal aging of shared pages.
+ */
+int
+pmap_ts_referenced(vm_page_t m)
+{
+	register pv_entry_t pv, pvf, pvn;
+	pt_entry_t *pte;
+	pt_entry_t v;
+	int rtval = 0;
+
+	if (m->flags & PG_FICTITIOUS)
+		return (rtval);
+
+	sched_pin();
+	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+	if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
+
+		pvf = pv;
+
+		do {
+			pvn = TAILQ_NEXT(pv, pv_list);
+
+			TAILQ_REMOVE(&m->md.pv_list, pv, pv_list);
+
+			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list);
+
+			PMAP_LOCK(pv->pv_pmap);
+			pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+
+			if (pte && ((v = *pte) & PG_A) != 0) {
+#ifdef XEN
+				PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE);
+#else
+				atomic_clear_int((u_int *)pte, PG_A);
+#endif
+				pmap_invalidate_page(pv->pv_pmap, pv->pv_va);
+
+				rtval++;
+				if (rtval > 4) {
+					PMAP_UNLOCK(pv->pv_pmap);
+					break;
+				}
+			}
+			PMAP_UNLOCK(pv->pv_pmap);
+		} while ((pv = pvn) != NULL && pv != pvf);
+	}
+	PT_UPDATES_FLUSH();
+	sched_unpin();
+
+	return (rtval);
+}
+
+/*
+ *	Clear the modify bits on the specified physical page.
+ */
+void
+pmap_clear_modify(vm_page_t m)
+{
+	pmap_clear_ptes(m, PG_M);
+}
+
+/*
+ *	pmap_clear_reference:
+ *
+ *	Clear the reference bit on the specified physical page.
+ */
+void
+pmap_clear_reference(vm_page_t m)
+{
+	pmap_clear_ptes(m, PG_A);
+}
+
+/*
+ * Miscellaneous support routines follow
+ */
+
+/*
+ * Map a set of physical memory pages into the kernel virtual
+ * address space. Return a pointer to where it is mapped. This
+ * routine is intended to be used for mapping device memory,
+ * NOT real memory.
+ */
+void *
+pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode)
+{
+	vm_offset_t va, tmpva, offset;
+
+	offset = pa & PAGE_MASK;
+	size = roundup(offset + size, PAGE_SIZE);
+	pa = pa & PG_FRAME;
+
+	if (pa < KERNLOAD && pa + size <= KERNLOAD)
+		va = KERNBASE + pa;
+	else
+		va = kmem_alloc_nofault(kernel_map, size);
+	if (!va)
+		panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
+
+	for (tmpva = va; size > 0; ) {
+		pmap_kenter_attr(tmpva, pa, mode);
+		size -= PAGE_SIZE;
+		tmpva += PAGE_SIZE;
+		pa += PAGE_SIZE;
+	}
+	pmap_invalidate_range(kernel_pmap, va, tmpva);
+	pmap_invalidate_cache();
+	return ((void *)(va + offset));
+}
+
+void *
+pmap_mapdev(vm_paddr_t pa, vm_size_t size)
+{
+
+	return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE));
+}
+
+void *
+pmap_mapbios(vm_paddr_t pa, vm_size_t size)
+{
+
+	return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK));
+}
+
+void
+pmap_unmapdev(va, size)
+	vm_offset_t va;
+	vm_size_t size;
+{
+	vm_offset_t base, offset, tmpva;
+
+	if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD)
+		return;
+	base = va & PG_FRAME;
+	offset = va & PAGE_MASK;
+	size = roundup(offset + size, PAGE_SIZE);
+	critical_enter();
+	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE)
+		pmap_kremove(tmpva);
+	pmap_invalidate_range(kernel_pmap, va, tmpva);
+	critical_exit();
+	kmem_free(kernel_map, base, size);
+}
+
+int
+pmap_change_attr(va, size, mode)
+	vm_offset_t va;
+	vm_size_t size;
+	int mode;
+{
+	vm_offset_t base, offset, tmpva;
+	pt_entry_t *pte;
+	vm_paddr_t opte, npte;
+	pd_entry_t *pde;
+
+	base = va & PG_FRAME;
+	offset = va & PAGE_MASK;
+	size = roundup(offset + size, PAGE_SIZE);
+
+	/* Only supported on kernel virtual addresses. */
+	if (base <= VM_MAXUSER_ADDRESS)
+		return (EINVAL);
+
+	/* 4MB pages and pages that aren't mapped aren't supported. */
+	for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
+		pde = pmap_pde(kernel_pmap, tmpva);
+		if (*pde & PG_PS)
+			return (EINVAL);
+		if (*pde == 0)
+			return (EINVAL);
+		pte = vtopte(va);
+		if (*pte == 0)
+			return (EINVAL);
+	}
+
+	/*
+	 * Ok, all the pages exist and are 4k, so run through them updating
+	 * their cache mode.
+	 */
+	for (tmpva = base; size > 0; ) {
+		pte = vtopte(tmpva);
+
+		/*
+		 * The cache mode bits are all in the low 32-bits of the
+		 * PTE, so we can just spin on updating the low 32-bits.
+		 */
+		do {
+			opte = *pte;
+			npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT);
+			npte |= pmap_cache_bits(mode, 0);
+#ifdef XEN
+			PT_SET_VA_MA(pte, npte, TRUE);
+#endif			
+		}
+#ifdef XEN
+		while (npte != opte && (*pte != npte));
+#else		
+		while (npte != opte &&
+		    !atomic_cmpset_int((u_int *)pte, opte, npte));
+#endif
+		tmpva += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+
+	/*
+	 * Flush CPU caches to make sure any data isn't cached that shouldn't
+	 * be, etc.
+	 */    
+	pmap_invalidate_range(kernel_pmap, base, tmpva);
+	pmap_invalidate_cache();
+	return (0);
+}
+
+/*
+ * perform the pmap work for mincore
+ */
+int
+pmap_mincore(pmap, addr)
+	pmap_t pmap;
+	vm_offset_t addr;
+{
+	pt_entry_t *ptep, pte;
+	vm_page_t m;
+	int val = 0;
+	
+	PMAP_LOCK(pmap);
+	ptep = pmap_pte(pmap, addr);
+	pte = (ptep != NULL) ? PT_GET(ptep) : 0;
+	pmap_pte_release(ptep);
+	PMAP_UNLOCK(pmap);
+
+	if (pte != 0) {
+		vm_paddr_t pa;
+
+		val = MINCORE_INCORE;
+		if ((pte & PG_MANAGED) == 0)
+			return val;
+
+		pa = pte & PG_FRAME;
+
+		m = PHYS_TO_VM_PAGE(pa);
+
+		/*
+		 * Modified by us
+		 */
+		if (pte & PG_M)
+			val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER;
+		else {
+			/*
+			 * Modified by someone else
+			 */
+			vm_page_lock_queues();
+			if (m->dirty || pmap_is_modified(m))
+				val |= MINCORE_MODIFIED_OTHER;
+			vm_page_unlock_queues();
+		}
+		/*
+		 * Referenced by us
+		 */
+		if (pte & PG_A)
+			val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER;
+		else {
+			/*
+			 * Referenced by someone else
+			 */
+			vm_page_lock_queues();
+			if ((m->flags & PG_REFERENCED) ||
+			    pmap_ts_referenced(m)) {
+				val |= MINCORE_REFERENCED_OTHER;
+				vm_page_flag_set(m, PG_REFERENCED);
+			}
+			vm_page_unlock_queues();
+		}
+	} 
+	return val;
+}
+
+void
+pmap_activate(struct thread *td)
+{
+	pmap_t	pmap, oldpmap;
+	u_int32_t  cr3;
+
+	critical_enter();
+	pmap = vmspace_pmap(td->td_proc->p_vmspace);
+	oldpmap = PCPU_GET(curpmap);
+#if defined(SMP)
+	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
+	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+#else
+	oldpmap->pm_active &= ~1;
+	pmap->pm_active |= 1;
+#endif
+#ifdef PAE
+	cr3 = vtophys(pmap->pm_pdpt);
+#else
+	cr3 = vtophys(pmap->pm_pdir);
+#endif
+	/*
+	 * pmap_activate is for the current thread on the current cpu
+	 */
+	td->td_pcb->pcb_cr3 = cr3;
+	PT_UPDATES_FLUSH();
+	load_cr3(cr3);
+	PCPU_SET(curpmap, pmap);
+	critical_exit();
+}
+
+vm_offset_t
+pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
+{
+
+	if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) {
+		return addr;
+	}
+
+	addr = (addr + PDRMASK) & ~PDRMASK;
+	return addr;
+}
+
+#ifdef XEN
+
+void
+pmap_suspend()
+{
+	pmap_t pmap;
+	int i, pdir, offset;
+	vm_paddr_t pdirma;
+	mmu_update_t mu[4];
+
+	/*
+	 * We need to remove the recursive mapping structure from all
+	 * our pmaps so that Xen doesn't get confused when it restores
+	 * the page tables. The recursive map lives at page directory
+	 * index PTDPTDI. We assume that the suspend code has stopped
+	 * the other vcpus (if any).
+	 */
+	LIST_FOREACH(pmap, &allpmaps, pm_list) {
+		for (i = 0; i < 4; i++) {
+			/*
+			 * Figure out which page directory (L2) page
+			 * contains this bit of the recursive map and
+			 * the offset within that page of the map
+			 * entry
+			 */
+			pdir = (PTDPTDI + i) / NPDEPG;
+			offset = (PTDPTDI + i) % NPDEPG;
+			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
+			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
+			mu[i].val = 0;
+		}
+		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
+	}
+}
+
+void
+pmap_resume()
+{
+	pmap_t pmap;
+	int i, pdir, offset;
+	vm_paddr_t pdirma;
+	mmu_update_t mu[4];
+
+	/*
+	 * Restore the recursive map that we removed on suspend.
+	 */
+	LIST_FOREACH(pmap, &allpmaps, pm_list) {
+		for (i = 0; i < 4; i++) {
+			/*
+			 * Figure out which page directory (L2) page
+			 * contains this bit of the recursive map and
+			 * the offset within that page of the map
+			 * entry
+			 */
+			pdir = (PTDPTDI + i) / NPDEPG;
+			offset = (PTDPTDI + i) % NPDEPG;
+			pdirma = pmap->pm_pdpt[pdir] & PG_FRAME;
+			mu[i].ptr = pdirma + offset * sizeof(pd_entry_t);
+			mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V;
+		}
+		HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF);
+	}
+}
+
+#endif
+
+#if defined(PMAP_DEBUG)
+pmap_pid_dump(int pid)
+{
+	pmap_t pmap;
+	struct proc *p;
+	int npte = 0;
+	int index;
+
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_pid != pid)
+			continue;
+
+		if (p->p_vmspace) {
+			int i,j;
+			index = 0;
+			pmap = vmspace_pmap(p->p_vmspace);
+			for (i = 0; i < NPDEPTD; i++) {
+				pd_entry_t *pde;
+				pt_entry_t *pte;
+				vm_offset_t base = i << PDRSHIFT;
+				
+				pde = &pmap->pm_pdir[i];
+				if (pde && pmap_pde_v(pde)) {
+					for (j = 0; j < NPTEPG; j++) {
+						vm_offset_t va = base + (j << PAGE_SHIFT);
+						if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
+							if (index) {
+								index = 0;
+								printf("\n");
+							}
+							sx_sunlock(&allproc_lock);
+							return npte;
+						}
+						pte = pmap_pte(pmap, va);
+						if (pte && pmap_pte_v(pte)) {
+							pt_entry_t pa;
+							vm_page_t m;
+							pa = PT_GET(pte);
+							m = PHYS_TO_VM_PAGE(pa);
+							printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
+								va, pa, m->hold_count, m->wire_count, m->flags);
+							npte++;
+							index++;
+							if (index >= 2) {
+								index = 0;
+								printf("\n");
+							} else {
+								printf(" ");
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+	sx_sunlock(&allproc_lock);
+	return npte;
+}
+#endif
+
+#if defined(DEBUG)
+
+static void	pads(pmap_t pm);
+void		pmap_pvdump(vm_paddr_t pa);
+
+/* print address space of pmap*/
+static void
+pads(pmap_t pm)
+{
+	int i, j;
+	vm_paddr_t va;
+	pt_entry_t *ptep;
+
+	if (pm == kernel_pmap)
+		return;
+	for (i = 0; i < NPDEPTD; i++)
+		if (pm->pm_pdir[i])
+			for (j = 0; j < NPTEPG; j++) {
+				va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
+				if (pm == kernel_pmap && va < KERNBASE)
+					continue;
+				if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
+					continue;
+				ptep = pmap_pte(pm, va);
+				if (pmap_pte_v(ptep))
+					printf("%x:%x ", va, *ptep);
+			};
+
+}
+
+void
+pmap_pvdump(vm_paddr_t pa)
+{
+	pv_entry_t pv;
+	vm_page_t m;
+
+	printf("pa %x", pa);
+	m = PHYS_TO_VM_PAGE(pa);
+	TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
+		printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
+		pads(pv->pv_pmap);
+	}
+	printf(" ");
+}
+#endif

Property changes on: i386/xen/pmap.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: i386/pci/pci_pir.c
===================================================================
--- i386/pci/pci_pir.c	(.../stable/6/sys)	(revision 184012)
+++ i386/pci/pci_pir.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -137,6 +137,10 @@
 	int i;
 	uint8_t ck, *cv;
 
+#ifdef XEN
+	return;
+#else	
+	
 	/* Don't try if we've already found a table. */
 	if (pci_route_table != NULL)
 		return;
@@ -147,7 +151,7 @@
 		sigaddr = bios_sigsearch(0, "_PIR", 4, 16, 0);
 	if (sigaddr == 0)
 		return;
-
+#endif
 	/* If we found something, check the checksum and length. */
 	/* XXX - Use pmap_mapdev()? */
 	pt = (struct PIR_table *)(uintptr_t)BIOS_PADDRTOVADDR(sigaddr);
@@ -478,7 +482,11 @@
 	args.eax = PCIBIOS_ROUTE_INTERRUPT;
 	args.ebx = (bus << 8) | (device << 3) | func;
 	args.ecx = (irq << 8) | (0xa + pin);
+#ifdef XEN
+	return (0);
+#else	
 	return (bios32(&args, PCIbios.ventry, GSEL(GCODE_SEL, SEL_KPL)));
+#endif
 }
 
 
Index: i386/pci/pci_cfgreg.c
===================================================================
--- i386/pci/pci_cfgreg.c	(.../stable/6/sys)	(revision 184012)
+++ i386/pci/pci_cfgreg.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -82,9 +82,10 @@
 
 static int	pcireg_cfgread(int bus, int slot, int func, int reg, int bytes);
 static void	pcireg_cfgwrite(int bus, int slot, int func, int reg, int data, int bytes);
+#ifndef XEN
 static int	pcireg_cfgopen(void);
-
 static int	pciereg_cfgopen(void);
+#endif
 static int	pciereg_cfgread(int bus, int slot, int func, int reg,
 				int bytes);
 static void	pciereg_cfgwrite(int bus, int slot, int func, int reg,
@@ -105,6 +106,7 @@
 	return (line);
 }
 
+#ifndef XEN
 static u_int16_t
 pcibios_get_version(void)
 {
@@ -125,6 +127,7 @@
 	}
 	return (args.ebx & 0xffff);
 }
+#endif
 
 /* 
  * Initialise access to PCI configuration space 
@@ -132,6 +135,9 @@
 int
 pci_cfgregopen(void)
 {
+#ifdef XEN
+	return (0);
+#else	
 	static int		opened = 0;
 	u_int16_t		vid, did;
 	u_int16_t		v;
@@ -171,6 +177,7 @@
 	}
 
 	return(1);
+#endif /* !XEN */
 }
 
 /* 
@@ -349,6 +356,7 @@
 	mtx_unlock_spin(&pcicfg_mtx);
 }
 
+#ifndef XEN
 /* check whether the configuration mechanism has been correctly identified */
 static int
 pci_cfgcheck(int maxdev)
@@ -526,6 +534,7 @@
 	devmax = 32;
 	return (1);
 }
+#endif /* !XEN */
 
 #define PCIE_PADDR(bar, reg, bus, slot, func)	\
 	((bar)				|	\
Index: i386/isa/npx.c
===================================================================
--- i386/isa/npx.c	(.../stable/6/sys)	(revision 184012)
+++ i386/isa/npx.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -69,6 +69,10 @@
 #include <machine/ucontext.h>
 
 #include <machine/intr_machdep.h>
+#include <machine/xen/xen-os.h>
+#ifdef XEN
+#include <xen/hypervisor.h>
+#endif
 #ifdef DEV_ISA
 #include <isa/isavar.h>
 #endif
@@ -101,10 +105,15 @@
 #define	fxsave(addr)		__asm __volatile("fxsave %0" : "=m" (*(addr)))
 #define	ldmxcsr(__csr)		__asm __volatile("ldmxcsr %0" : : "m" (__csr))
 #endif
+#ifdef XEN
+#define start_emulating()	(HYPERVISOR_fpu_taskswitch(1))
+#define stop_emulating()	(HYPERVISOR_fpu_taskswitch(0))
+#else
 #define	start_emulating()	__asm("smsw %%ax; orb %0,%%al; lmsw %%ax" \
 				      : : "n" (CR0_TS) : "ax")
 #define	stop_emulating()	__asm("clts")
 
+#endif
 #else	/* !(__GNUCLIKE_ASM && !lint) */
 
 void	fldcw(caddr_t addr);
Index: xen/features.c
===================================================================
--- xen/features.c	(.../stable/6/sys)	(revision 0)
+++ xen/features.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,26 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/features.h>
+
+uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32] /* __read_mostly */;
+
+void
+setup_xen_features(void)
+{
+        xen_feature_info_t fi;
+        int i, j;
+
+        for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
+                fi.submap_idx = i;
+                if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
+                        break;
+                for (j = 0; j < 32; j++)
+                        xen_features[i*32 + j] = !!(fi.submap & 1<<j);
+        }
+}

Property changes on: xen/features.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: xen/gnttab.c
===================================================================
--- xen/gnttab.c	(.../stable/6/sys)	(revision 0)
+++ xen/gnttab.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,673 @@
+/******************************************************************************
+ * gnttab.c
+ * 
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ * 
+ * Copyright (c) 2005, Christopher Clark
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_global.h"
+#include "opt_pmap.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/conf.h>
+#include <sys/module.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mman.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/synch_bitops.h>
+
+#include <xen/hypervisor.h>
+#include <xen/gnttab.h>
+
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_extern.h>
+#include <vm/pmap.h>
+
+#define cmpxchg(a, b, c) atomic_cmpset_int((volatile u_int *)(a),(b),(c))
+
+/* External tools reserve first few grant table entries. */
+#define NR_RESERVED_ENTRIES 8
+#define GNTTAB_LIST_END 0xffffffff
+#define GREFS_PER_GRANT_FRAME (PAGE_SIZE / sizeof(grant_entry_t))
+
+static grant_ref_t **gnttab_list;
+static unsigned int nr_grant_frames;
+static unsigned int boot_max_nr_grant_frames;
+static int gnttab_free_count;
+static grant_ref_t gnttab_free_head;
+static struct mtx gnttab_list_lock;
+
+static grant_entry_t *shared;
+
+static struct gnttab_free_callback *gnttab_free_callback_list = NULL;
+
+static int gnttab_expand(unsigned int req_entries);
+
+#define RPP (PAGE_SIZE / sizeof(grant_ref_t))
+#define gnttab_entry(entry) (gnttab_list[(entry) / RPP][(entry) % RPP])
+
+static int
+get_free_entries(int count, int *entries)
+{
+	int ref, error;
+	grant_ref_t head;
+	
+	mtx_lock(&gnttab_list_lock);
+	if ((gnttab_free_count < count) &&
+	    ((error = gnttab_expand(count - gnttab_free_count)) != 0)) {
+		mtx_unlock(&gnttab_list_lock);
+		return (error);
+	}
+	ref = head = gnttab_free_head;
+	gnttab_free_count -= count;
+	while (count-- > 1)
+		head = gnttab_entry(head);
+	gnttab_free_head = gnttab_entry(head);
+	gnttab_entry(head) = GNTTAB_LIST_END;
+	mtx_unlock(&gnttab_list_lock);	
+
+	*entries = ref;
+	return (0);
+}
+
+static void
+do_free_callbacks(void)
+{
+	struct gnttab_free_callback *callback, *next;
+
+	callback = gnttab_free_callback_list;
+	gnttab_free_callback_list = NULL;
+
+	while (callback != NULL) {
+		next = callback->next;
+		if (gnttab_free_count >= callback->count) {
+			callback->next = NULL;
+			callback->fn(callback->arg);
+		} else {
+			callback->next = gnttab_free_callback_list;
+			gnttab_free_callback_list = callback;
+		}
+		callback = next;
+	}
+}
+
+static inline void
+check_free_callbacks(void)
+{
+	if (unlikely(gnttab_free_callback_list != NULL))
+		do_free_callbacks();
+}
+
+static void
+put_free_entry(grant_ref_t ref)
+{
+
+	mtx_lock(&gnttab_list_lock);
+	gnttab_entry(ref) = gnttab_free_head;
+	gnttab_free_head = ref;
+	gnttab_free_count++;
+	check_free_callbacks();
+	mtx_unlock(&gnttab_list_lock);	
+}
+
+/*
+ * Public grant-issuing interface functions
+ */
+
+int
+gnttab_grant_foreign_access(domid_t domid, unsigned long frame, int readonly,
+	grant_ref_t *result)
+{
+	int error, ref;
+
+	error = get_free_entries(1, &ref);
+	
+	if (unlikely(error))
+		return (error);
+
+	shared[ref].frame = frame;
+	shared[ref].domid = domid;
+	wmb();
+	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+
+	if (result)
+		*result = ref;
+
+	return (0);
+}
+
+void
+gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+				unsigned long frame, int readonly)
+{
+
+	shared[ref].frame = frame;
+	shared[ref].domid = domid;
+	wmb();
+	shared[ref].flags = GTF_permit_access | (readonly ? GTF_readonly : 0);
+}
+
+int
+gnttab_query_foreign_access(grant_ref_t ref)
+{
+	uint16_t nflags;
+	
+	nflags = shared[ref].flags;
+	
+	return (nflags & (GTF_reading|GTF_writing));
+}
+
+int
+gnttab_end_foreign_access_ref(grant_ref_t ref)
+{
+	uint16_t flags, nflags;
+
+	nflags = shared[ref].flags;
+	do {
+		if ( (flags = nflags) & (GTF_reading|GTF_writing) ) {
+			printf("WARNING: g.e. still in use!\n");
+			return (0);
+		}
+	} while ((nflags = synch_cmpxchg(&shared[ref].flags, flags, 0)) !=
+	       flags);
+
+	return (1);
+}
+
+void
+gnttab_end_foreign_access(grant_ref_t ref, void *page)
+{
+	if (gnttab_end_foreign_access_ref(ref)) {
+		put_free_entry(ref);
+		if (page != NULL) {
+			free(page, M_DEVBUF);
+		}
+	}
+	else {
+		/* XXX This needs to be fixed so that the ref and page are
+		   placed on a list to be freed up later. */
+		printf("WARNING: leaking g.e. and page still in use!\n");
+	}
+}
+
+int
+gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn,
+    grant_ref_t *result)
+{
+	int error, ref;
+
+	error = get_free_entries(1, &ref);
+	if (unlikely(error))
+		return (error);
+
+	gnttab_grant_foreign_transfer_ref(ref, domid, pfn);
+	
+	*result = ref;
+	return (0);
+}
+
+void
+gnttab_grant_foreign_transfer_ref(grant_ref_t ref, domid_t domid,
+	unsigned long pfn)
+{
+	shared[ref].frame = pfn;
+	shared[ref].domid = domid;
+	wmb();
+	shared[ref].flags = GTF_accept_transfer;
+}
+
+unsigned long
+gnttab_end_foreign_transfer_ref(grant_ref_t ref)
+{
+	unsigned long frame;
+	uint16_t      flags;
+
+	/*
+         * If a transfer is not even yet started, try to reclaim the grant
+         * reference and return failure (== 0).
+         */
+	while (!((flags = shared[ref].flags) & GTF_transfer_committed)) {
+		if ( synch_cmpxchg(&shared[ref].flags, flags, 0) == flags )
+			return (0);
+		cpu_relax();
+	}
+
+	/* If a transfer is in progress then wait until it is completed. */
+	while (!(flags & GTF_transfer_completed)) {
+		flags = shared[ref].flags;
+		cpu_relax();
+	}
+
+	/* Read the frame number /after/ reading completion status. */
+	rmb();
+	frame = shared[ref].frame;
+	KASSERT(frame != 0, ("grant table inconsistent"));
+
+	return (frame);
+}
+
+unsigned long
+gnttab_end_foreign_transfer(grant_ref_t ref)
+{
+	unsigned long frame = gnttab_end_foreign_transfer_ref(ref);
+
+	put_free_entry(ref);
+	return (frame);
+}
+
+void
+gnttab_free_grant_reference(grant_ref_t ref)
+{
+
+	put_free_entry(ref);
+}
+
+void
+gnttab_free_grant_references(grant_ref_t head)
+{
+	grant_ref_t ref;
+	int count = 1;
+	
+	if (head == GNTTAB_LIST_END)
+		return;
+	
+	mtx_lock(&gnttab_list_lock);
+	ref = head;
+	while (gnttab_entry(ref) != GNTTAB_LIST_END) {
+		ref = gnttab_entry(ref);
+		count++;
+	}
+	gnttab_entry(ref) = gnttab_free_head;
+	gnttab_free_head = head;
+	gnttab_free_count += count;
+	check_free_callbacks();
+	mtx_unlock(&gnttab_list_lock);
+}
+
+int
+gnttab_alloc_grant_references(uint16_t count, grant_ref_t *head)
+{
+	int ref, error;
+
+	error = get_free_entries(count, &ref);
+	if (unlikely(error))
+		return (error);
+
+	*head = ref;
+	return (0);
+}
+
+int
+gnttab_empty_grant_references(const grant_ref_t *private_head)
+{
+
+	return (*private_head == GNTTAB_LIST_END);
+}
+
+int
+gnttab_claim_grant_reference(grant_ref_t *private_head)
+{
+	grant_ref_t g = *private_head;
+
+	if (unlikely(g == GNTTAB_LIST_END))
+		return (ENOSPC);
+	*private_head = gnttab_entry(g);
+	return (g);
+}
+
+void
+gnttab_release_grant_reference(grant_ref_t *private_head, grant_ref_t  release)
+{
+
+	gnttab_entry(release) = *private_head;
+	*private_head = release;
+}
+
+void
+gnttab_request_free_callback(struct gnttab_free_callback *callback,
+    void (*fn)(void *), void *arg, uint16_t count)
+{
+
+	mtx_lock(&gnttab_list_lock);
+	if (callback->next)
+		goto out;
+	callback->fn = fn;
+	callback->arg = arg;
+	callback->count = count;
+	callback->next = gnttab_free_callback_list;
+	gnttab_free_callback_list = callback;
+	check_free_callbacks();
+ out:
+	mtx_unlock(&gnttab_list_lock);
+
+}
+
+void
+gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
+{
+	struct gnttab_free_callback **pcb;
+
+	mtx_lock(&gnttab_list_lock);
+	for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
+		if (*pcb == callback) {
+			*pcb = callback->next;
+			break;
+		}
+	}
+	mtx_unlock(&gnttab_list_lock);
+}
+
+
+static int
+grow_gnttab_list(unsigned int more_frames)
+{
+	unsigned int new_nr_grant_frames, extra_entries, i;
+
+	new_nr_grant_frames = nr_grant_frames + more_frames;
+	extra_entries       = more_frames * GREFS_PER_GRANT_FRAME;
+
+	for (i = nr_grant_frames; i < new_nr_grant_frames; i++)
+	{
+		gnttab_list[i] = (grant_ref_t *)
+			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+
+		if (!gnttab_list[i])
+			goto grow_nomem;
+	}
+
+	for (i = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	     i < GREFS_PER_GRANT_FRAME * new_nr_grant_frames - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(i) = gnttab_free_head;
+	gnttab_free_head = GREFS_PER_GRANT_FRAME * nr_grant_frames;
+	gnttab_free_count += extra_entries;
+
+	nr_grant_frames = new_nr_grant_frames;
+
+	check_free_callbacks();
+
+	return (0);
+	
+grow_nomem:
+	for ( ; i >= nr_grant_frames; i--)
+		free(gnttab_list[i], M_DEVBUF);
+	return (ENOMEM);
+}
+
+static unsigned int
+__max_nr_grant_frames(void)
+{
+	struct gnttab_query_size query;
+	int rc;
+
+	query.dom = DOMID_SELF;
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
+	if ((rc < 0) || (query.status != GNTST_okay))
+		return (4); /* Legacy max supported number of frames */
+
+	return (query.max_nr_frames);
+}
+
+static inline
+unsigned int max_nr_grant_frames(void)
+{
+	unsigned int xen_max = __max_nr_grant_frames();
+
+	if (xen_max > boot_max_nr_grant_frames)
+		return (boot_max_nr_grant_frames);
+	return (xen_max);
+}
+
+#ifdef notyet
+/*
+ * XXX needed for backend support
+ *
+ */
+static int
+map_pte_fn(pte_t *pte, struct page *pmd_page,
+		      unsigned long addr, void *data)
+{
+	unsigned long **frames = (unsigned long **)data;
+
+	set_pte_at(&init_mm, addr, pte, pfn_pte_ma((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
+static int
+unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+			unsigned long addr, void *data)
+{
+
+	set_pte_at(&init_mm, addr, pte, __pte(0));
+	return 0;
+}
+#endif
+
+#ifndef XENHVM
+
+static int
+gnttab_map(unsigned int start_idx, unsigned int end_idx)
+{
+	struct gnttab_setup_table setup;
+	u_long *frames;
+
+	unsigned int nr_gframes = end_idx + 1;
+	int i, rc;
+
+	frames = malloc(nr_gframes * sizeof(unsigned long), M_DEVBUF, M_NOWAIT);
+	if (!frames)
+		return (ENOMEM);
+
+	setup.dom        = DOMID_SELF;
+	setup.nr_frames  = nr_gframes;
+	set_xen_guest_handle(setup.frame_list, frames);
+
+	rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
+	if (rc == -ENOSYS) {
+		free(frames, M_DEVBUF);
+		return (ENOSYS);
+	}
+	KASSERT(!(rc || setup.status),
+	    ("unexpected result from grant_table_op"));
+
+	if (shared == NULL) {
+		vm_offset_t area;
+		
+		area = kmem_alloc_nofault(kernel_map,
+		    PAGE_SIZE * max_nr_grant_frames());
+		KASSERT(area, ("can't allocate VM space for grant table"));
+		shared = (grant_entry_t *)area;
+	}
+
+	for (i = 0; i < nr_gframes; i++)
+		PT_SET_MA(((caddr_t)shared) + i*PAGE_SIZE, 
+		    ((vm_paddr_t)frames[i]) << PAGE_SHIFT | PG_RW | PG_V);
+
+	free(frames, M_DEVBUF);
+	
+	return (0);
+}
+
+int
+gnttab_resume(void)
+{
+
+	if (max_nr_grant_frames() < nr_grant_frames)
+		return (ENOSYS);
+	return (gnttab_map(0, nr_grant_frames - 1));
+}
+
+int
+gnttab_suspend(void)
+{	
+	int i;
+
+	for (i = 0; i < nr_grant_frames; i++)
+		pmap_kremove((vm_offset_t) shared + i * PAGE_SIZE);
+
+	return (0);
+}
+
+#else /* XENHVM */
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+static vm_paddr_t resume_frames;
+
+static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
+{
+	struct xen_add_to_physmap xatp;
+	unsigned int i = end_idx;
+
+	/*
+	 * Loop backwards, so that the first hypercall has the largest index,
+	 * ensuring that the table will grow only once.
+	 */
+	do {
+		xatp.domid = DOMID_SELF;
+		xatp.idx = i;
+		xatp.space = XENMAPSPACE_grant_table;
+		xatp.gpfn = (resume_frames >> PAGE_SHIFT) + i;
+		if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp))
+			panic("HYPERVISOR_memory_op failed to map gnttab");
+	} while (i-- > start_idx);
+
+	if (shared == NULL) {
+		vm_offset_t area;
+		
+		area = kmem_alloc_nofault(kernel_map,
+		    PAGE_SIZE * max_nr_grant_frames());
+		KASSERT(area, ("can't allocate VM space for grant table"));
+		shared = (grant_entry_t *)area;
+	}
+
+	for (i = start_idx; i <= end_idx; i++) {
+		pmap_kenter((vm_offset_t) shared + i * PAGE_SIZE,
+		    resume_frames + i * PAGE_SIZE);
+	}
+
+	return (0);
+}
+
+int
+gnttab_resume(void)
+{
+	int error;
+	unsigned int max_nr_gframes, nr_gframes;
+
+	nr_gframes = nr_grant_frames;
+	max_nr_gframes = max_nr_grant_frames();
+	if (max_nr_gframes < nr_gframes)
+		return (ENOSYS);
+
+	if (!resume_frames) {
+		error = xenpci_alloc_space(PAGE_SIZE * max_nr_gframes,
+		    &resume_frames);
+		if (error) {
+			printf("error mapping gnttab share frames\n");
+			return (error);
+		}
+	}
+
+	return (gnttab_map(0, nr_gframes - 1));
+}
+
+#endif
+
+static int
+gnttab_expand(unsigned int req_entries)
+{
+	int error;
+	unsigned int cur, extra;
+
+	cur = nr_grant_frames;
+	extra = ((req_entries + (GREFS_PER_GRANT_FRAME-1)) /
+		 GREFS_PER_GRANT_FRAME);
+	if (cur + extra > max_nr_grant_frames())
+		return (ENOSPC);
+
+	error = gnttab_map(cur, cur + extra - 1);
+	if (!error)
+		error = grow_gnttab_list(extra);
+
+	return (error);
+}
+
+int 
+gnttab_init()
+{
+	int i;
+	unsigned int max_nr_glist_frames;
+	unsigned int nr_init_grefs;
+
+	if (!is_running_on_xen())
+		return (ENODEV);
+
+	nr_grant_frames = 1;
+	boot_max_nr_grant_frames = __max_nr_grant_frames();
+
+	/* Determine the maximum number of frames required for the
+	 * grant reference free list on the current hypervisor.
+	 */
+	max_nr_glist_frames = (boot_max_nr_grant_frames *
+			       GREFS_PER_GRANT_FRAME /
+			       (PAGE_SIZE / sizeof(grant_ref_t)));
+
+	gnttab_list = malloc(max_nr_glist_frames * sizeof(grant_ref_t *),
+	    M_DEVBUF, M_NOWAIT);
+
+	if (gnttab_list == NULL)
+		return (ENOMEM);
+
+	for (i = 0; i < nr_grant_frames; i++) {
+		gnttab_list[i] = (grant_ref_t *)
+			malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT);
+		if (gnttab_list[i] == NULL)
+			goto ini_nomem;
+	}
+	
+	if (gnttab_resume())
+		return (ENODEV);
+	
+	nr_init_grefs = nr_grant_frames * GREFS_PER_GRANT_FRAME;
+
+	for (i = NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
+		gnttab_entry(i) = i + 1;
+
+	gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
+	gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
+	gnttab_free_head  = NR_RESERVED_ENTRIES;
+
+	if (bootverbose)
+		printf("Grant table initialized\n");
+
+	return (0);
+
+ini_nomem:
+	for (i--; i >= 0; i--)
+		free(gnttab_list[i], M_DEVBUF);
+	free(gnttab_list, M_DEVBUF);
+	return (ENOMEM);
+
+}
+
+MTX_SYSINIT(gnttab, &gnttab_list_lock, "GNTTAB LOCK", MTX_DEF); 
+//SYSINIT(gnttab, SI_SUB_PSEUDO, SI_ORDER_FIRST, gnttab_init, NULL);

Property changes on: xen/gnttab.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/hypervisor.h
===================================================================
--- xen/hypervisor.h	(.../stable/6/sys)	(revision 0)
+++ xen/hypervisor.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,152 @@
+/******************************************************************************
+ * hypervisor.h
+  * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002, K A Fraser
+ */
+
+#ifndef __XEN_HYPERVISOR_H__
+#define __XEN_HYPERVISOR_H__
+
+#ifdef XENHVM
+
+#define is_running_on_xen()	(HYPERVISOR_shared_info != NULL)
+
+#else
+
+#define is_running_on_xen() 1
+
+#endif
+
+#ifdef PAE
+#ifndef CONFIG_X86_PAE
+#define CONFIG_X86_PAE
+#endif
+#endif
+
+#include <sys/cdefs.h>
+#include <sys/systm.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/platform.h>
+#include <xen/interface/event_channel.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/sched.h>
+#include <xen/interface/callback.h>
+#include <xen/interface/memory.h>
+#include <machine/xen/hypercall.h>
+
+#if defined(__amd64__)
+#define MULTI_UVMFLAGS_INDEX 2
+#define MULTI_UVMDOMID_INDEX 3
+#else
+#define MULTI_UVMFLAGS_INDEX 3
+#define MULTI_UVMDOMID_INDEX 4
+#endif
+
+#ifdef CONFIG_XEN_PRIVILEGED_GUEST
+#define is_initial_xendomain() (xen_start_info->flags & SIF_INITDOMAIN)
+#else
+#define is_initial_xendomain() 0
+#endif
+
+extern start_info_t *xen_start_info;
+
+extern uint64_t get_system_time(int ticks);
+
+static inline int 
+HYPERVISOR_console_write(char *str, int count)
+{
+    return HYPERVISOR_console_io(CONSOLEIO_write, count, str); 
+}
+
+static inline void HYPERVISOR_crash(void) __dead2;
+
+static inline int
+HYPERVISOR_yield(void)
+{
+        int rc = HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+#endif
+        return (rc);
+}
+
+static inline int
+HYPERVISOR_block(
+        void)
+{
+        int rc = HYPERVISOR_sched_op(SCHEDOP_block, NULL);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_block, 0);
+#endif
+        return (rc);
+}
+
+
+static inline void 
+HYPERVISOR_shutdown(unsigned int reason)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = reason
+	};
+
+	HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+#if CONFIG_XEN_COMPAT <= 0x030002
+	HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
+#endif
+}
+
+static inline void
+HYPERVISOR_crash(void) 
+{
+        HYPERVISOR_shutdown(SHUTDOWN_crash); 
+	/* NEVER REACHED */
+        for (;;) ; /* eliminate noreturn error */ 
+}
+
+/* Transfer control to hypervisor until an event is detected on one */
+/* of the specified ports or the specified number of ticks elapse */
+static inline int
+HYPERVISOR_poll(
+	evtchn_port_t *ports, unsigned int nr_ports, int ticks)
+{
+	int rc;
+	struct sched_poll sched_poll = {
+		.nr_ports = nr_ports,
+		.timeout = get_system_time(ticks)
+	};
+	set_xen_guest_handle(sched_poll.ports, ports);
+
+	rc = HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll);
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = HYPERVISOR_sched_op_compat(SCHEDOP_yield, 0);
+#endif	
+	return (rc);
+}
+
+static inline void
+MULTI_update_va_mapping(
+	multicall_entry_t *mcl, unsigned long va,
+        uint64_t new_val, unsigned long flags)
+{
+    mcl->op = __HYPERVISOR_update_va_mapping;
+    mcl->args[0] = va;
+#if defined(__amd64__)
+    mcl->args[1] = new_val;
+#elif defined(PAE)
+    mcl->args[1] = (uint32_t)(new_val & 0xffffffff) ;
+    mcl->args[2] = (uint32_t)(new_val >> 32);
+#else
+    mcl->args[1] = new_val;
+    mcl->args[2] = 0;
+#endif
+    mcl->args[MULTI_UVMFLAGS_INDEX] = flags;
+}
+
+#endif /* __XEN_HYPERVISOR_H__ */

Property changes on: xen/hypervisor.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen/hypervisor.h:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen/hypervisor.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: xen/interface/xen.h
===================================================================
--- xen/interface/xen.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/xen.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,639 @@
+/******************************************************************************
+ * xen.h
+ * 
+ * Guest OS interface to Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_XEN_H__
+#define __XEN_PUBLIC_XEN_H__
+
+#include "xen-compat.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/xen.h"
+#elif defined(__ia64__)
+#include "arch-ia64.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+DEFINE_XEN_GUEST_HANDLE(char);
+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
+DEFINE_XEN_GUEST_HANDLE(int);
+__DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
+DEFINE_XEN_GUEST_HANDLE(long);
+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+DEFINE_XEN_GUEST_HANDLE(void);
+
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+#endif
+
+/*
+ * HYPERCALLS
+ */
+
+#define __HYPERVISOR_set_trap_table        0
+#define __HYPERVISOR_mmu_update            1
+#define __HYPERVISOR_set_gdt               2
+#define __HYPERVISOR_stack_switch          3
+#define __HYPERVISOR_set_callbacks         4
+#define __HYPERVISOR_fpu_taskswitch        5
+#define __HYPERVISOR_sched_op_compat       6 /* compat since 0x00030101 */
+#define __HYPERVISOR_platform_op           7
+#define __HYPERVISOR_set_debugreg          8
+#define __HYPERVISOR_get_debugreg          9
+#define __HYPERVISOR_update_descriptor    10
+#define __HYPERVISOR_memory_op            12
+#define __HYPERVISOR_multicall            13
+#define __HYPERVISOR_update_va_mapping    14
+#define __HYPERVISOR_set_timer_op         15
+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
+#define __HYPERVISOR_xen_version          17
+#define __HYPERVISOR_console_io           18
+#define __HYPERVISOR_physdev_op_compat    19 /* compat since 0x00030202 */
+#define __HYPERVISOR_grant_table_op       20
+#define __HYPERVISOR_vm_assist            21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret                 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op              24
+#define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op            26
+#define __HYPERVISOR_xsm_op               27
+#define __HYPERVISOR_nmi_op               28
+#define __HYPERVISOR_sched_op             29
+#define __HYPERVISOR_callback_op          30
+#define __HYPERVISOR_xenoprof_op          31
+#define __HYPERVISOR_event_channel_op     32
+#define __HYPERVISOR_physdev_op           33
+#define __HYPERVISOR_hvm_op               34
+#define __HYPERVISOR_sysctl               35
+#define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0               48
+#define __HYPERVISOR_arch_1               49
+#define __HYPERVISOR_arch_2               50
+#define __HYPERVISOR_arch_3               51
+#define __HYPERVISOR_arch_4               52
+#define __HYPERVISOR_arch_5               53
+#define __HYPERVISOR_arch_6               54
+#define __HYPERVISOR_arch_7               55
+
+/*
+ * HYPERCALL COMPATIBILITY.
+ */
+
+/* New sched_op hypercall introduced in 0x00030101. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030101
+#undef __HYPERVISOR_sched_op
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
+#endif
+
+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030202
+#undef __HYPERVISOR_event_channel_op
+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
+#undef __HYPERVISOR_physdev_op
+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
+#endif
+
+/* New platform_op hypercall introduced in 0x00030204. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030204
+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
+#endif
+
+/* 
+ * VIRTUAL INTERRUPTS
+ * 
+ * Virtual interrupts that a guest OS may receive from Xen.
+ * 
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
+ */
+#define VIRQ_TIMER      0  /* V. Timebase update, and/or requested timeout.  */
+#define VIRQ_DEBUG      1  /* V. Request guest to dump debug info.           */
+#define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
+#define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
+#define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
+#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
+
+/* Architecture-specific VIRQ definitions. */
+#define VIRQ_ARCH_0    16
+#define VIRQ_ARCH_1    17
+#define VIRQ_ARCH_2    18
+#define VIRQ_ARCH_3    19
+#define VIRQ_ARCH_4    20
+#define VIRQ_ARCH_5    21
+#define VIRQ_ARCH_6    22
+#define VIRQ_ARCH_7    23
+
+#define NR_VIRQS       24
+
+/*
+ * MMU-UPDATE REQUESTS
+ * 
+ * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ * ptr[1:0] specifies the appropriate MMU_* command.
+ * 
+ * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
+ * Updates an entry in a page table. If updating an L1 table, and the new
+ * table entry is valid/present, the mapped frame must belong to the FD, if
+ * an FD has been specified. If attempting to map an I/O page then the
+ * caller assumes the privilege of the FD.
+ * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
+ * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
+ * ptr[:2]  -- Machine address of the page-table entry to modify.
+ * val      -- Value to write.
+ * 
+ * ptr[1:0] == MMU_MACHPHYS_UPDATE:
+ * Updates an entry in the machine->pseudo-physical mapping table.
+ * ptr[:2]  -- Machine address within the frame whose mapping to modify.
+ *             The frame must belong to the FD, if one is specified.
+ * val      -- Value to write into the mapping entry.
+ * 
+ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD:
+ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
+ * with those in @val.
+ */
+#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.      */
+#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for */
+#define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */
+
+/*
+ * MMU EXTENDED OPERATIONS
+ * 
+ * HYPERVISOR_mmuext_op() accepts a list of mmuext_op structures.
+ * A foreigndom (FD) can be specified (or DOMID_SELF for none).
+ * Where the FD has some effect, it is described below.
+ * 
+ * cmd: MMUEXT_(UN)PIN_*_TABLE
+ * mfn: Machine frame number to be (un)pinned as a p.t. page.
+ *      The frame must belong to the FD, if one is specified.
+ * 
+ * cmd: MMUEXT_NEW_BASEPTR
+ * mfn: Machine frame number of new page-table base to install in MMU.
+ * 
+ * cmd: MMUEXT_NEW_USER_BASEPTR [x86/64 only]
+ * mfn: Machine frame number of new page-table base to install in MMU
+ *      when in user space.
+ * 
+ * cmd: MMUEXT_TLB_FLUSH_LOCAL
+ * No additional arguments. Flushes local TLB.
+ * 
+ * cmd: MMUEXT_INVLPG_LOCAL
+ * linear_addr: Linear address to be flushed from the local TLB.
+ * 
+ * cmd: MMUEXT_TLB_FLUSH_MULTI
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ * 
+ * cmd: MMUEXT_INVLPG_MULTI
+ * linear_addr: Linear address to be flushed.
+ * vcpumask: Pointer to bitmap of VCPUs to be flushed.
+ * 
+ * cmd: MMUEXT_TLB_FLUSH_ALL
+ * No additional arguments. Flushes all VCPUs' TLBs.
+ * 
+ * cmd: MMUEXT_INVLPG_ALL
+ * linear_addr: Linear address to be flushed from all VCPUs' TLBs.
+ * 
+ * cmd: MMUEXT_FLUSH_CACHE
+ * No additional arguments. Writes back and flushes cache contents.
+ * 
+ * cmd: MMUEXT_SET_LDT
+ * linear_addr: Linear address of LDT base (NB. must be page-aligned).
+ * nr_ents: Number of entries in LDT.
+ */
+#define MMUEXT_PIN_L1_TABLE      0
+#define MMUEXT_PIN_L2_TABLE      1
+#define MMUEXT_PIN_L3_TABLE      2
+#define MMUEXT_PIN_L4_TABLE      3
+#define MMUEXT_UNPIN_TABLE       4
+#define MMUEXT_NEW_BASEPTR       5
+#define MMUEXT_TLB_FLUSH_LOCAL   6
+#define MMUEXT_INVLPG_LOCAL      7
+#define MMUEXT_TLB_FLUSH_MULTI   8
+#define MMUEXT_INVLPG_MULTI      9
+#define MMUEXT_TLB_FLUSH_ALL    10
+#define MMUEXT_INVLPG_ALL       11
+#define MMUEXT_FLUSH_CACHE      12
+#define MMUEXT_SET_LDT          13
+#define MMUEXT_NEW_USER_BASEPTR 15
+
+#ifndef __ASSEMBLY__
+struct mmuext_op {
+    unsigned int cmd;
+    union {
+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
+        xen_pfn_t     mfn;
+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+        unsigned long linear_addr;
+    } arg1;
+    union {
+        /* SET_LDT */
+        unsigned int nr_ents;
+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+        XEN_GUEST_HANDLE(void) vcpumask;
+#else
+        void *vcpumask;
+#endif
+    } arg2;
+};
+typedef struct mmuext_op mmuext_op_t;
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
+#endif
+
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+/* When specifying UVMF_MULTI, also OR in a pointer to a CPU bitmap.   */
+/* UVMF_LOCAL is merely UVMF_MULTI with a NULL bitmap pointer.         */
+#define UVMF_NONE               (0UL<<0) /* No flushing at all.   */
+#define UVMF_TLB_FLUSH          (1UL<<0) /* Flush entire TLB(s).  */
+#define UVMF_INVLPG             (2UL<<0) /* Flush only one entry. */
+#define UVMF_FLUSHTYPE_MASK     (3UL<<0)
+#define UVMF_MULTI              (0UL<<2) /* Flush subset of TLBs. */
+#define UVMF_LOCAL              (0UL<<2) /* Flush local TLB.      */
+#define UVMF_ALL                (1UL<<2) /* Flush all TLBs.       */
+
+/*
+ * Commands to HYPERVISOR_console_io().
+ */
+#define CONSOLEIO_write         0
+#define CONSOLEIO_read          1
+
+/*
+ * Commands to HYPERVISOR_vm_assist().
+ */
+#define VMASST_CMD_enable                0
+#define VMASST_CMD_disable               1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
+#define VMASST_TYPE_4gb_segments         0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
+#define VMASST_TYPE_4gb_segments_notify  1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
+#define VMASST_TYPE_writable_pagetables  2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
+#define VMASST_TYPE_pae_extended_cr3     3
+
+#define MAX_VMASST_TYPE                  3
+
+#ifndef __ASSEMBLY__
+
+typedef uint16_t domid_t;
+
+/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */
+#define DOMID_FIRST_RESERVED (0x7FF0U)
+
+/* DOMID_SELF is used in certain contexts to refer to oneself. */
+#define DOMID_SELF (0x7FF0U)
+
+/*
+ * DOMID_IO is used to restrict page-table updates to mapping I/O memory.
+ * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO
+ * is useful to ensure that no mappings to the OS's own heap are accidentally
+ * installed. (e.g., in Linux this could cause havoc as reference counts
+ * aren't adjusted on the I/O-mapping code path).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can
+ * be specified by any calling domain.
+ */
+#define DOMID_IO   (0x7FF1U)
+
+/*
+ * DOMID_XEN is used to allow privileged domains to map restricted parts of
+ * Xen's heap space (e.g., the machine_to_phys table).
+ * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if
+ * the caller is privileged.
+ */
+#define DOMID_XEN  (0x7FF2U)
+
+/*
+ * Send an array of these to HYPERVISOR_mmu_update().
+ * NB. The fields are natural pointer/address size for this architecture.
+ */
+struct mmu_update {
+    uint64_t ptr;       /* Machine address of PTE. */
+    uint64_t val;       /* New contents of PTE.    */
+};
+typedef struct mmu_update mmu_update_t;
+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);
+
+/*
+ * Send an array of these to HYPERVISOR_multicall().
+ * NB. The fields are natural register size for this architecture.
+ */
+struct multicall_entry {
+    unsigned long op, result;
+    unsigned long args[6];
+};
+typedef struct multicall_entry multicall_entry_t;
+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);
+
+/*
+ * Event channel endpoints per domain:
+ *  1024 if a long is 32 bits; 4096 if a long is 64 bits.
+ */
+#define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)
+
+struct vcpu_time_info {
+    /*
+     * Updates to the following values are preceded and followed by an
+     * increment of 'version'. The guest can therefore detect updates by
+     * looking for changes to 'version'. If the least-significant bit of
+     * the version number is set then an update is in progress and the guest
+     * must wait to read a consistent set of values.
+     * The correct way to interact with the version number is similar to
+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
+     */
+    uint32_t version;
+    uint32_t pad0;
+    uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */
+    uint64_t system_time;     /* Time, in nanosecs, since boot.    */
+    /*
+     * Current system time:
+     *   system_time +
+     *   ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
+     * CPU frequency (Hz):
+     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+     */
+    uint32_t tsc_to_system_mul;
+    int8_t   tsc_shift;
+    int8_t   pad1[3];
+}; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;
+
+struct vcpu_info {
+    /*
+     * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+     * a pending notification for a particular VCPU. It is then cleared 
+     * by the guest OS /before/ checking for pending work, thus avoiding
+     * a set-and-check race. Note that the mask is only accessed by Xen
+     * on the CPU that is currently hosting the VCPU. This means that the
+     * pending and mask flags can be updated by the guest without special
+     * synchronisation (i.e., no need for the x86 LOCK prefix).
+     * This may seem suboptimal because if the pending flag is set by
+     * a different CPU then an IPI may be scheduled even when the mask
+     * is set. However, note:
+     *  1. The task of 'interrupt holdoff' is covered by the per-event-
+     *     channel mask bits. A 'noisy' event that is continually being
+     *     triggered can be masked at source at this very precise
+     *     granularity.
+     *  2. The main purpose of the per-VCPU mask is therefore to restrict
+     *     reentrant execution: whether for concurrency control, or to
+     *     prevent unbounded stack usage. Whatever the purpose, we expect
+     *     that the mask will be asserted only for short periods at a time,
+     *     and so the likelihood of a 'spurious' IPI is suitably small.
+     * The mask is read before making an event upcall to the guest: a
+     * non-zero mask therefore guarantees that the VCPU will not receive
+     * an upcall activation. The mask is cleared when the VCPU requests
+     * to block: this avoids wakeup-waiting races.
+     */
+    uint8_t evtchn_upcall_pending;
+    uint8_t evtchn_upcall_mask;
+    unsigned long evtchn_pending_sel;
+    struct arch_vcpu_info arch;
+    struct vcpu_time_info time;
+}; /* 64 bytes (x86) */
+#ifndef __XEN__
+typedef struct vcpu_info vcpu_info_t;
+#endif
+
+/*
+ * Xen/kernel shared data -- pointer provided in start_info.
+ *
+ * This structure is defined to be both smaller than a page, and the
+ * only data on the shared page, but may vary in actual size even within
+ * compatible Xen versions; guests should not rely on the size
+ * of this structure remaining constant.
+ */
+struct shared_info {
+    struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
+
+    /*
+     * A domain can create "event channels" on which it can send and receive
+     * asynchronous event notifications. There are three classes of event that
+     * are delivered by this mechanism:
+     *  1. Bi-directional inter- and intra-domain connections. Domains must
+     *     arrange out-of-band to set up a connection (usually by allocating
+     *     an unbound 'listener' port and avertising that via a storage service
+     *     such as xenstore).
+     *  2. Physical interrupts. A domain with suitable hardware-access
+     *     privileges can bind an event-channel port to a physical interrupt
+     *     source.
+     *  3. Virtual interrupts ('events'). A domain can bind an event-channel
+     *     port to a virtual interrupt source, such as the virtual-timer
+     *     device or the emergency console.
+     * 
+     * Event channels are addressed by a "port index". Each channel is
+     * associated with two bits of information:
+     *  1. PENDING -- notifies the domain that there is a pending notification
+     *     to be processed. This bit is cleared by the guest.
+     *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+     *     will cause an asynchronous upcall to be scheduled. This bit is only
+     *     updated by the guest. It is read-only within Xen. If a channel
+     *     becomes pending while the channel is masked then the 'edge' is lost
+     *     (i.e., when the channel is unmasked, the guest must manually handle
+     *     pending notifications as no upcall will be scheduled by Xen).
+     * 
+     * To expedite scanning of pending notifications, any 0->1 pending
+     * transition on an unmasked channel causes a corresponding bit in a
+     * per-vcpu selector word to be set. Each bit in the selector covers a
+     * 'C long' in the PENDING bitfield array.
+     */
+    unsigned long evtchn_pending[sizeof(unsigned long) * 8];
+    unsigned long evtchn_mask[sizeof(unsigned long) * 8];
+
+    /*
+     * Wallclock time: updated only by control software. Guests should base
+     * their gettimeofday() syscall on this wallclock-base value.
+     */
+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
+    uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
+    uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
+
+    struct arch_shared_info arch;
+
+};
+#ifndef __XEN__
+typedef struct shared_info shared_info_t;
+#endif
+
+/*
+ * Start-of-day memory layout:
+ *  1. The domain is started within contiguous virtual-memory region.
+ *  2. The contiguous region ends on an aligned 4MB boundary.
+ *  3. This the order of bootstrap elements in the initial virtual region:
+ *      a. relocated kernel image
+ *      b. initial ram disk              [mod_start, mod_len]
+ *      c. list of allocated page frames [mfn_list, nr_pages]
+ *      d. start_info_t structure        [register ESI (x86)]
+ *      e. bootstrap page tables         [pt_base, CR3 (x86)]
+ *      f. bootstrap stack               [register ESP (x86)]
+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ *  5. The initial ram disk may be omitted.
+ *  6. The list of page frames forms a contiguous 'pseudo-physical' memory
+ *     layout for the domain. In particular, the bootstrap virtual-memory
+ *     region is a 1:1 mapping to the first section of the pseudo-physical map.
+ *  7. All bootstrap elements are mapped read-writable for the guest OS. The
+ *     only exception is the bootstrap page table, which is mapped read-only.
+ *  8. There is guaranteed to be at least 512kB padding after the final
+ *     bootstrap element. If necessary, the bootstrap virtual region is
+ *     extended by an extra 4MB to ensure this.
+ */
+
+#define MAX_GUEST_CMDLINE 1024
+struct start_info {
+    /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
+    char magic[32];             /* "xen-<version>-<platform>".            */
+    unsigned long nr_pages;     /* Total pages allocated to this domain.  */
+    unsigned long shared_info;  /* MACHINE address of shared info struct. */
+    uint32_t flags;             /* SIF_xxx flags.                         */
+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
+    uint32_t store_evtchn;      /* Event channel for store communication. */
+    union {
+        struct {
+            xen_pfn_t mfn;      /* MACHINE page number of console page.   */
+            uint32_t  evtchn;   /* Event channel for console page.        */
+        } domU;
+        struct {
+            uint32_t info_off;  /* Offset of console_info struct.         */
+            uint32_t info_size; /* Size of console_info struct from start.*/
+        } dom0;
+    } console;
+    /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
+    unsigned long pt_base;      /* VIRTUAL address of page directory.     */
+    unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
+    unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
+    unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
+    int8_t cmd_line[MAX_GUEST_CMDLINE];
+};
+typedef struct start_info start_info_t;
+
+/* New console union for dom0 introduced in 0x00030203. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+#define console_mfn    console.domU.mfn
+#define console_evtchn console.domU.evtchn
+#endif
+
+/* These flags are passed in the 'flags' field of start_info_t. */
+#define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
+#define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
+#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */
+
+typedef struct dom0_vga_console_info {
+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB    0x23
+
+    union {
+        struct {
+            /* Font height, in pixels. */
+            uint16_t font_height;
+            /* Cursor location (column, row). */
+            uint16_t cursor_x, cursor_y;
+            /* Number of rows and columns (dimensions in characters). */
+            uint16_t rows, columns;
+        } text_mode_3;
+
+        struct {
+            /* Width and height, in pixels. */
+            uint16_t width, height;
+            /* Bytes per scan line. */
+            uint16_t bytes_per_line;
+            /* Bits per pixel. */
+            uint16_t bits_per_pixel;
+            /* LFB physical address, and size (in units of 64kB). */
+            uint32_t lfb_base;
+            uint32_t lfb_size;
+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+            uint8_t  red_pos, red_size;
+            uint8_t  green_pos, green_size;
+            uint8_t  blue_pos, blue_size;
+            uint8_t  rsvd_pos, rsvd_size;
+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+            uint32_t gbl_caps;
+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+            uint16_t mode_attrs;
+#endif
+        } vesa_lfb;
+    } u;
+} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t
+
+typedef uint8_t xen_domain_handle_t[16];
+
+/* Turn a plain number into a C unsigned long constant. */
+#define __mk_unsigned_long(x) x ## UL
+#define mk_unsigned_long(x) __mk_unsigned_long(x)
+
+__DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);
+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
+
+#else /* __ASSEMBLY__ */
+
+/* In assembly code we cannot use C numeric constant suffixes. */
+#define mk_unsigned_long(x) x
+
+#endif /* !__ASSEMBLY__ */
+
+/* Default definitions for macros used by domctl/sysctl. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+#endif
+
+#endif /* __XEN_PUBLIC_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/xen.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-powerpc.h
===================================================================
--- xen/interface/arch-powerpc.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-powerpc.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,120 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) IBM Corp. 2005, 2006
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include "xen.h"
+
+#ifndef __XEN_PUBLIC_ARCH_PPC_64_H__
+#define __XEN_PUBLIC_ARCH_PPC_64_H__
+
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef struct { \
+        int __pad[(sizeof (long long) - sizeof (void *)) / sizeof (int)]; \
+        type *p; \
+    } __attribute__((__aligned__(8))) __guest_handle_ ## name
+
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
+#define set_xen_guest_handle(hnd, val) \
+    do { \
+        if (sizeof ((hnd).__pad)) \
+            (hnd).__pad[0] = 0; \
+        (hnd).p = val; \
+    } while (0)
+
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long long xen_pfn_t;
+#define PRI_xen_pfn "llx"
+#endif
+
+/*
+ * Pointers and other address fields inside interface structures are padded to
+ * 64 bits. This means that field alignments aren't different between 32- and
+ * 64-bit architectures. 
+ */
+/* NB. Multi-level macro ensures __LINE__ is expanded before concatenation. */
+#define __MEMORY_PADDING(_X)
+#define _MEMORY_PADDING(_X)  __MEMORY_PADDING(_X)
+#define MEMORY_PADDING       _MEMORY_PADDING(__LINE__)
+
+/* And the trap vector is... */
+#define TRAP_INSTR "li 0,-1; sc" /* XXX just "sc"? */
+
+#ifndef __ASSEMBLY__
+
+#define XENCOMM_INLINE_FLAG (1UL << 63)
+
+typedef uint64_t xen_ulong_t;
+
+/* User-accessible registers: nost of these need to be saved/restored
+ * for every nested Xen invocation. */
+struct cpu_user_regs
+{
+    uint64_t gprs[32];
+    uint64_t lr;
+    uint64_t ctr;
+    uint64_t srr0;
+    uint64_t srr1;
+    uint64_t pc;
+    uint64_t msr;
+    uint64_t fpscr;             /* XXX Is this necessary */
+    uint64_t xer;
+    uint64_t hid4;              /* debug only */
+    uint64_t dar;               /* debug only */
+    uint32_t dsisr;             /* debug only */
+    uint32_t cr;
+    uint32_t __pad;             /* good spot for another 32bit reg */
+    uint32_t entry_vector;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */ /* XXX timebase */
+
+/* ONLY used to communicate with dom0! See also struct exec_domain. */
+struct vcpu_guest_context {
+    cpu_user_regs_t user_regs;         /* User-level CPU registers     */
+    uint64_t sdr1;                     /* Pagetable base               */
+    /* XXX etc */
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_shared_info {
+    uint64_t boot_timebase;
+};
+
+struct arch_vcpu_info {
+};
+
+/* Support for multi-processor guests. */
+#define MAX_VIRT_CPUS 32
+#endif
+
+#endif

Property changes on: xen/interface/arch-powerpc.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/callback.h
===================================================================
--- xen/interface/callback.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/callback.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * callback.h
+ *
+ * Register guest OS callbacks with Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __XEN_PUBLIC_CALLBACK_H__
+#define __XEN_PUBLIC_CALLBACK_H__
+
+#include "xen.h"
+
+/*
+ * Prototype for this hypercall is:
+ *   long callback_op(int cmd, void *extra_args)
+ * @cmd        == CALLBACKOP_??? (callback operation).
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/* ia64, x86: Callback for event delivery. */
+#define CALLBACKTYPE_event                 0
+
+/* x86: Failsafe callback when guest state cannot be restored by Xen. */
+#define CALLBACKTYPE_failsafe              1
+
+/* x86/64 hypervisor: Syscall by 64-bit guest app ('64-on-64-on-64'). */
+#define CALLBACKTYPE_syscall               2
+
+/*
+ * x86/32 hypervisor: Only available on x86/32 when supervisor_mode_kernel
+ *     feature is enabled. Do not use this callback type in new code.
+ */
+#define CALLBACKTYPE_sysenter_deprecated   3
+
+/* x86: Callback for NMI delivery. */
+#define CALLBACKTYPE_nmi                   4
+
+/*
+ * x86: sysenter is only available as follows:
+ * - 32-bit hypervisor: with the supervisor_mode_kernel feature enabled
+ * - 64-bit hypervisor: 32-bit guest applications on Intel CPUs
+ *                      ('32-on-32-on-64', '32-on-64-on-64')
+ *                      [nb. also 64-bit guest applications on Intel CPUs
+ *                           ('64-on-64-on-64'), but syscall is preferred]
+ */
+#define CALLBACKTYPE_sysenter              5
+
+/*
+ * x86/64 hypervisor: Syscall by 32-bit guest app on AMD CPUs
+ *                    ('32-on-32-on-64', '32-on-64-on-64')
+ */
+#define CALLBACKTYPE_syscall32             7
+
+/*
+ * Disable event deliver during callback? This flag is ignored for event and
+ * NMI callbacks: event delivery is unconditionally disabled.
+ */
+#define _CALLBACKF_mask_events             0
+#define CALLBACKF_mask_events              (1U << _CALLBACKF_mask_events)
+
+/*
+ * Register a callback.
+ */
+#define CALLBACKOP_register                0
+struct callback_register {
+    uint16_t type;
+    uint16_t flags;
+    xen_callback_t address;
+};
+typedef struct callback_register callback_register_t;
+DEFINE_XEN_GUEST_HANDLE(callback_register_t);
+
+/*
+ * Unregister a callback.
+ *
+ * Not all callbacks can be unregistered. -EINVAL will be returned if
+ * you attempt to unregister such a callback.
+ */
+#define CALLBACKOP_unregister              1
+struct callback_unregister {
+    uint16_t type;
+    uint16_t _unused;
+};
+typedef struct callback_unregister callback_unregister_t;
+DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030207
+#undef CALLBACKTYPE_sysenter
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#endif
+
+#endif /* __XEN_PUBLIC_CALLBACK_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/callback.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/elfnote.h
===================================================================
--- xen/interface/elfnote.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/elfnote.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,233 @@
+/******************************************************************************
+ * elfnote.h
+ *
+ * Definitions used for the Xen ELF notes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
+ */
+
+#ifndef __XEN_PUBLIC_ELFNOTE_H__
+#define __XEN_PUBLIC_ELFNOTE_H__
+
+/*
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
+ * name field.
+ *
+ * Numeric types are either 4 or 8 bytes depending on the content of
+ * the desc field.
+ *
+ * LEGACY indicated the fields in the legacy __xen_guest string which
+ * this a note type replaces.
+ */
+
+/*
+ * NAME=VALUE pair (string).
+ */
+#define XEN_ELFNOTE_INFO           0
+
+/*
+ * The virtual address of the entry point (numeric).
+ *
+ * LEGACY: VIRT_ENTRY
+ */
+#define XEN_ELFNOTE_ENTRY          1
+
+/* The virtual address of the hypercall transfer page (numeric).
+ *
+ * LEGACY: HYPERCALL_PAGE. (n.b. legacy value is a physical page
+ * number not a virtual address)
+ */
+#define XEN_ELFNOTE_HYPERCALL_PAGE 2
+
+/* The virtual address where the kernel image should be mapped (numeric).
+ *
+ * Defaults to 0.
+ *
+ * LEGACY: VIRT_BASE
+ */
+#define XEN_ELFNOTE_VIRT_BASE      3
+
+/*
+ * The offset of the ELF paddr field from the acutal required
+ * psuedo-physical address (numeric).
+ *
+ * This is used to maintain backwards compatibility with older kernels
+ * which wrote __PAGE_OFFSET into that field. This field defaults to 0
+ * if not present.
+ *
+ * LEGACY: ELF_PADDR_OFFSET. (n.b. legacy default is VIRT_BASE)
+ */
+#define XEN_ELFNOTE_PADDR_OFFSET   4
+
+/*
+ * The version of Xen that we work with (string).
+ *
+ * LEGACY: XEN_VER
+ */
+#define XEN_ELFNOTE_XEN_VERSION    5
+
+/*
+ * The name of the guest operating system (string).
+ *
+ * LEGACY: GUEST_OS
+ */
+#define XEN_ELFNOTE_GUEST_OS       6
+
+/*
+ * The version of the guest operating system (string).
+ *
+ * LEGACY: GUEST_VER
+ */
+#define XEN_ELFNOTE_GUEST_VERSION  7
+
+/*
+ * The loader type (string).
+ *
+ * LEGACY: LOADER
+ */
+#define XEN_ELFNOTE_LOADER         8
+
+/*
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
+ *
+ * LEGACY: PAE (n.b. The legacy interface included a provision to
+ * indicate 'extended-cr3' support allowing L3 page tables to be
+ * placed above 4G. It is assumed that any kernel new enough to use
+ * these ELF notes will include this and therefore "yes" here is
+ * equivalent to "yes[entended-cr3]" in the __xen_guest interface.
+ */
+#define XEN_ELFNOTE_PAE_MODE       9
+
+/*
+ * The features supported/required by this kernel (string).
+ *
+ * The string must consist of a list of feature names (as given in
+ * features.h, without the "XENFEAT_" prefix) separated by '|'
+ * characters. If a feature is required for the kernel to function
+ * then the feature name must be preceded by a '!' character.
+ *
+ * LEGACY: FEATURES
+ */
+#define XEN_ELFNOTE_FEATURES      10
+
+/*
+ * The kernel requires the symbol table to be loaded (string = "yes" or "no")
+ * LEGACY: BSD_SYMTAB (n.b. The legacy treated the presence or absence
+ * of this string as a boolean flag rather than requiring "yes" or
+ * "no".
+ */
+#define XEN_ELFNOTE_BSD_SYMTAB    11
+
+/*
+ * The lowest address the hypervisor hole can begin at (numeric).
+ *
+ * This must not be set higher than HYPERVISOR_VIRT_START. Its presence
+ * also indicates to the hypervisor that the kernel can deal with the
+ * hole starting at a higher address.
+ */
+#define XEN_ELFNOTE_HV_START_LOW  12
+
+/*
+ * List of maddr_t-sized mask/value pairs describing how to recognize
+ * (non-present) L1 page table entries carrying valid MFNs (numeric).
+ */
+#define XEN_ELFNOTE_L1_MFN_VALID  13
+
+/*
+ * Whether or not the guest supports cooperative suspend cancellation.
+ */
+#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO 
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS 
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE               0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER             0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION        0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION     0x2000003
+
+#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/elfnote.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/libelf.h
===================================================================
--- xen/interface/libelf.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/libelf.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,265 @@
+/******************************************************************************
+ * libelf.h
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XC_LIBELF__
+#define __XC_LIBELF__ 1
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
+#define XEN_ELF_LITTLE_ENDIAN
+#else
+#error define architectural endianness
+#endif
+
+#undef ELFSIZE
+#include "elfnote.h"
+#include "elfstructs.h"
+#include "features.h"
+
+/* ------------------------------------------------------------------------ */
+
+typedef union {
+    Elf32_Ehdr e32;
+    Elf64_Ehdr e64;
+} elf_ehdr;
+
+typedef union {
+    Elf32_Phdr e32;
+    Elf64_Phdr e64;
+} elf_phdr;
+
+typedef union {
+    Elf32_Shdr e32;
+    Elf64_Shdr e64;
+} elf_shdr;
+
+typedef union {
+    Elf32_Sym e32;
+    Elf64_Sym e64;
+} elf_sym;
+
+typedef union {
+    Elf32_Rel e32;
+    Elf64_Rel e64;
+} elf_rel;
+
+typedef union {
+    Elf32_Rela e32;
+    Elf64_Rela e64;
+} elf_rela;
+
+typedef union {
+    Elf32_Note e32;
+    Elf64_Note e64;
+} elf_note;
+
+struct elf_binary {
+    /* elf binary */
+    const char *image;
+    size_t size;
+    char class;
+    char data;
+
+    const elf_ehdr *ehdr;
+    const char *sec_strtab;
+    const elf_shdr *sym_tab;
+    const char *sym_strtab;
+
+    /* loaded to */
+    char *dest;
+    uint64_t pstart;
+    uint64_t pend;
+    uint64_t reloc_offset;
+
+    uint64_t bsd_symtab_pstart;
+    uint64_t bsd_symtab_pend;
+
+#ifndef __XEN__
+    /* misc */
+    FILE *log;
+#endif
+    int verbose;
+};
+
+/* ------------------------------------------------------------------------ */
+/* accessing elf header fields                                              */
+
+#ifdef XEN_ELF_BIG_ENDIAN
+# define NATIVE_ELFDATA ELFDATA2MSB
+#else
+# define NATIVE_ELFDATA ELFDATA2LSB
+#endif
+
+#define elf_32bit(elf) (ELFCLASS32 == (elf)->class)
+#define elf_64bit(elf) (ELFCLASS64 == (elf)->class)
+#define elf_msb(elf)   (ELFDATA2MSB == (elf)->data)
+#define elf_lsb(elf)   (ELFDATA2LSB == (elf)->data)
+#define elf_swap(elf)  (NATIVE_ELFDATA != (elf)->data)
+
+#define elf_uval(elf, str, elem)                                        \
+    ((ELFCLASS64 == (elf)->class)                                       \
+     ? elf_access_unsigned((elf), (str),                                \
+                           offsetof(typeof(*(str)),e64.elem),           \
+                           sizeof((str)->e64.elem))                     \
+     : elf_access_unsigned((elf), (str),                                \
+                           offsetof(typeof(*(str)),e32.elem),           \
+                           sizeof((str)->e32.elem)))
+
+#define elf_sval(elf, str, elem)                                        \
+    ((ELFCLASS64 == (elf)->class)                                       \
+     ? elf_access_signed((elf), (str),                                  \
+                         offsetof(typeof(*(str)),e64.elem),             \
+                         sizeof((str)->e64.elem))                       \
+     : elf_access_signed((elf), (str),                                  \
+                         offsetof(typeof(*(str)),e32.elem),             \
+                         sizeof((str)->e32.elem)))
+
+#define elf_size(elf, str)                              \
+    ((ELFCLASS64 == (elf)->class)                       \
+     ? sizeof((str)->e64) : sizeof((str)->e32))
+
+uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr,
+                             uint64_t offset, size_t size);
+int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
+                          uint64_t offset, size_t size);
+
+uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_tools.c                                                        */
+
+int elf_shdr_count(struct elf_binary *elf);
+int elf_phdr_count(struct elf_binary *elf);
+
+const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name);
+const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index);
+const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index);
+
+const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr);
+
+const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr);
+const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr);
+
+const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol);
+const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index);
+
+const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
+const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
+
+int elf_is_elfbinary(const void *image);
+int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_loader.c                                                       */
+
+int elf_init(struct elf_binary *elf, const char *image, size_t size);
+#ifdef __XEN__
+void elf_set_verbose(struct elf_binary *elf);
+#else
+void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose);
+#endif
+
+void elf_parse_binary(struct elf_binary *elf);
+void elf_load_binary(struct elf_binary *elf);
+
+void *elf_get_ptr(struct elf_binary *elf, unsigned long addr);
+uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol);
+
+void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_relocate.c                                                     */
+
+int elf_reloc(struct elf_binary *elf);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_dominfo.c                                                      */
+
+#define UNSET_ADDR          ((uint64_t)-1)
+
+enum xen_elfnote_type {
+    XEN_ENT_NONE = 0,
+    XEN_ENT_LONG = 1,
+    XEN_ENT_STR  = 2
+};
+
+struct xen_elfnote {
+    enum xen_elfnote_type type;
+    const char *name;
+    union {
+        const char *str;
+        uint64_t num;
+    } data;
+};
+
+struct elf_dom_parms {
+    /* raw */
+    const char *guest_info;
+    const void *elf_note_start;
+    const void *elf_note_end;
+    struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1];
+  
+    /* parsed */
+    char guest_os[16];
+    char guest_ver[16];
+    char xen_ver[16];
+    char loader[16];
+    int pae;
+    int bsd_symtab;
+    uint64_t virt_base;
+    uint64_t virt_entry;
+    uint64_t virt_hypercall;
+    uint64_t virt_hv_start_low;
+    uint64_t elf_paddr_offset;
+    uint32_t f_supported[XENFEAT_NR_SUBMAPS];
+    uint32_t f_required[XENFEAT_NR_SUBMAPS];
+
+    /* calculated */
+    uint64_t virt_offset;
+    uint64_t virt_kstart;
+    uint64_t virt_kend;
+};
+
+static inline void elf_xen_feature_set(int nr, uint32_t * addr)
+{
+    addr[nr >> 5] |= 1 << (nr & 31);
+}
+static inline int elf_xen_feature_get(int nr, uint32_t * addr)
+{
+    return !!(addr[nr >> 5] & (1 << (nr & 31)));
+}
+
+int elf_xen_parse_features(const char *features,
+                           uint32_t *supported,
+                           uint32_t *required);
+int elf_xen_parse_note(struct elf_binary *elf,
+                       struct elf_dom_parms *parms,
+                       const elf_note *note);
+int elf_xen_parse_guest_info(struct elf_binary *elf,
+                             struct elf_dom_parms *parms);
+int elf_xen_parse(struct elf_binary *elf,
+                  struct elf_dom_parms *parms);
+
+#endif /* __XC_LIBELF__ */

Property changes on: xen/interface/libelf.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/trace.h
===================================================================
--- xen/interface/trace.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/trace.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,193 @@
+/******************************************************************************
+ * include/public/trace.h
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Mark Williamson, (C) 2004 Intel Research Cambridge
+ * Copyright (C) 2005 Bin Ren
+ */
+
+#ifndef __XEN_PUBLIC_TRACE_H__
+#define __XEN_PUBLIC_TRACE_H__
+
+#define TRACE_EXTRA_MAX    7
+#define TRACE_EXTRA_SHIFT 28
+
+/* Trace classes */
+#define TRC_CLS_SHIFT 16
+#define TRC_GEN      0x0001f000    /* General trace            */
+#define TRC_SCHED    0x0002f000    /* Xen Scheduler trace      */
+#define TRC_DOM0OP   0x0004f000    /* Xen DOM0 operation trace */
+#define TRC_HVM      0x0008f000    /* Xen HVM trace            */
+#define TRC_MEM      0x0010f000    /* Xen memory trace         */
+#define TRC_PV       0x0020f000    /* Xen PV traces            */
+#define TRC_SHADOW   0x0040f000    /* Xen shadow tracing       */
+#define TRC_ALL      0x0ffff000
+#define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff)
+#define TRC_HD_CYCLE_FLAG (1UL<<31)
+#define TRC_HD_INCLUDES_CYCLE_COUNT(x) ( !!( (x) & TRC_HD_CYCLE_FLAG ) )
+#define TRC_HD_EXTRA(x)    (((x)>>TRACE_EXTRA_SHIFT)&TRACE_EXTRA_MAX)
+
+/* Trace subclasses */
+#define TRC_SUBCLS_SHIFT 12
+
+/* trace subclasses for SVM */
+#define TRC_HVM_ENTRYEXIT 0x00081000   /* VMENTRY and #VMEXIT       */
+#define TRC_HVM_HANDLER   0x00082000   /* various HVM handlers      */
+
+#define TRC_SCHED_MIN       0x00021000   /* Just runstate changes */
+#define TRC_SCHED_VERBOSE   0x00028000   /* More inclusive scheduling */
+
+/* Trace events per class */
+#define TRC_LOST_RECORDS        (TRC_GEN + 1)
+#define TRC_TRACE_WRAP_BUFFER  (TRC_GEN + 2)
+#define TRC_TRACE_CPU_CHANGE    (TRC_GEN + 3)
+
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_DOM_ADD        (TRC_SCHED_VERBOSE +  1)
+#define TRC_SCHED_DOM_REM        (TRC_SCHED_VERBOSE +  2)
+#define TRC_SCHED_SLEEP          (TRC_SCHED_VERBOSE +  3)
+#define TRC_SCHED_WAKE           (TRC_SCHED_VERBOSE +  4)
+#define TRC_SCHED_YIELD          (TRC_SCHED_VERBOSE +  5)
+#define TRC_SCHED_BLOCK          (TRC_SCHED_VERBOSE +  6)
+#define TRC_SCHED_SHUTDOWN       (TRC_SCHED_VERBOSE +  7)
+#define TRC_SCHED_CTL            (TRC_SCHED_VERBOSE +  8)
+#define TRC_SCHED_ADJDOM         (TRC_SCHED_VERBOSE +  9)
+#define TRC_SCHED_SWITCH         (TRC_SCHED_VERBOSE + 10)
+#define TRC_SCHED_S_TIMER_FN     (TRC_SCHED_VERBOSE + 11)
+#define TRC_SCHED_T_TIMER_FN     (TRC_SCHED_VERBOSE + 12)
+#define TRC_SCHED_DOM_TIMER_FN   (TRC_SCHED_VERBOSE + 13)
+#define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14)
+#define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15)
+
+#define TRC_MEM_PAGE_GRANT_MAP      (TRC_MEM + 1)
+#define TRC_MEM_PAGE_GRANT_UNMAP    (TRC_MEM + 2)
+#define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3)
+
+#define TRC_PV_HYPERCALL             (TRC_PV +  1)
+#define TRC_PV_TRAP                  (TRC_PV +  3)
+#define TRC_PV_PAGE_FAULT            (TRC_PV +  4)
+#define TRC_PV_FORCED_INVALID_OP     (TRC_PV +  5)
+#define TRC_PV_EMULATE_PRIVOP        (TRC_PV +  6)
+#define TRC_PV_EMULATE_4GB           (TRC_PV +  7)
+#define TRC_PV_MATH_STATE_RESTORE    (TRC_PV +  8)
+#define TRC_PV_PAGING_FIXUP          (TRC_PV +  9)
+#define TRC_PV_GDT_LDT_MAPPING_FAULT (TRC_PV + 10)
+#define TRC_PV_PTWR_EMULATION        (TRC_PV + 11)
+#define TRC_PV_PTWR_EMULATION_PAE    (TRC_PV + 12)
+  /* Indicates that addresses in trace record are 64 bits */
+#define TRC_64_FLAG               (0x100) 
+
+#define TRC_SHADOW_NOT_SHADOW                 (TRC_SHADOW +  1)
+#define TRC_SHADOW_FAST_PROPAGATE             (TRC_SHADOW +  2)
+#define TRC_SHADOW_FAST_MMIO                  (TRC_SHADOW +  3)
+#define TRC_SHADOW_FALSE_FAST_PATH            (TRC_SHADOW +  4)
+#define TRC_SHADOW_MMIO                       (TRC_SHADOW +  5)
+#define TRC_SHADOW_FIXUP                      (TRC_SHADOW +  6)
+#define TRC_SHADOW_DOMF_DYING                 (TRC_SHADOW +  7)
+#define TRC_SHADOW_EMULATE                    (TRC_SHADOW +  8)
+#define TRC_SHADOW_EMULATE_UNSHADOW_USER      (TRC_SHADOW +  9)
+#define TRC_SHADOW_EMULATE_UNSHADOW_EVTINJ    (TRC_SHADOW + 10)
+#define TRC_SHADOW_EMULATE_UNSHADOW_UNHANDLED (TRC_SHADOW + 11)
+#define TRC_SHADOW_WRMAP_BF                   (TRC_SHADOW + 12)
+#define TRC_SHADOW_PREALLOC_UNPIN             (TRC_SHADOW + 13)
+#define TRC_SHADOW_RESYNC_FULL                (TRC_SHADOW + 14)
+#define TRC_SHADOW_RESYNC_ONLY                (TRC_SHADOW + 15)
+
+/* trace events per subclass */
+#define TRC_HVM_VMENTRY         (TRC_HVM_ENTRYEXIT + 0x01)
+#define TRC_HVM_VMEXIT          (TRC_HVM_ENTRYEXIT + 0x02)
+#define TRC_HVM_VMEXIT64        (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02)
+#define TRC_HVM_PF_XEN          (TRC_HVM_HANDLER + 0x01)
+#define TRC_HVM_PF_XEN64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x01)
+#define TRC_HVM_PF_INJECT       (TRC_HVM_HANDLER + 0x02)
+#define TRC_HVM_PF_INJECT64     (TRC_HVM_HANDLER + TRC_64_FLAG + 0x02)
+#define TRC_HVM_INJ_EXC         (TRC_HVM_HANDLER + 0x03)
+#define TRC_HVM_INJ_VIRQ        (TRC_HVM_HANDLER + 0x04)
+#define TRC_HVM_REINJ_VIRQ      (TRC_HVM_HANDLER + 0x05)
+#define TRC_HVM_IO_READ         (TRC_HVM_HANDLER + 0x06)
+#define TRC_HVM_IO_WRITE        (TRC_HVM_HANDLER + 0x07)
+#define TRC_HVM_CR_READ         (TRC_HVM_HANDLER + 0x08)
+#define TRC_HVM_CR_READ64       (TRC_HVM_HANDLER + TRC_64_FLAG + 0x08)
+#define TRC_HVM_CR_WRITE        (TRC_HVM_HANDLER + 0x09)
+#define TRC_HVM_CR_WRITE64      (TRC_HVM_HANDLER + TRC_64_FLAG + 0x09)
+#define TRC_HVM_DR_READ         (TRC_HVM_HANDLER + 0x0A)
+#define TRC_HVM_DR_WRITE        (TRC_HVM_HANDLER + 0x0B)
+#define TRC_HVM_MSR_READ        (TRC_HVM_HANDLER + 0x0C)
+#define TRC_HVM_MSR_WRITE       (TRC_HVM_HANDLER + 0x0D)
+#define TRC_HVM_CPUID           (TRC_HVM_HANDLER + 0x0E)
+#define TRC_HVM_INTR            (TRC_HVM_HANDLER + 0x0F)
+#define TRC_HVM_NMI             (TRC_HVM_HANDLER + 0x10)
+#define TRC_HVM_SMI             (TRC_HVM_HANDLER + 0x11)
+#define TRC_HVM_VMMCALL         (TRC_HVM_HANDLER + 0x12)
+#define TRC_HVM_HLT             (TRC_HVM_HANDLER + 0x13)
+#define TRC_HVM_INVLPG          (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_INVLPG64        (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
+#define TRC_HVM_MCE             (TRC_HVM_HANDLER + 0x15)
+#define TRC_HVM_IO_ASSIST       (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_MMIO_ASSIST     (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_CLTS            (TRC_HVM_HANDLER + 0x18)
+#define TRC_HVM_LMSW            (TRC_HVM_HANDLER + 0x19)
+#define TRC_HVM_LMSW64          (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
+
+/* This structure represents a single trace buffer record. */
+struct t_rec {
+    uint32_t event:28;
+    uint32_t extra_u32:3;         /* # entries in trailing extra_u32[] array */
+    uint32_t cycles_included:1;   /* u.cycles or u.no_cycles? */
+    union {
+        struct {
+            uint32_t cycles_lo, cycles_hi; /* cycle counter timestamp */
+            uint32_t extra_u32[7];         /* event data items */
+        } cycles;
+        struct {
+            uint32_t extra_u32[7];         /* event data items */
+        } nocycles;
+    } u;
+};
+
+/*
+ * This structure contains the metadata for a single trace buffer.  The head
+ * field, indexes into an array of struct t_rec's.
+ */
+struct t_buf {
+    /* Assume the data buffer size is X.  X is generally not a power of 2.
+     * CONS and PROD are incremented modulo (2*X):
+     *     0 <= cons < 2*X
+     *     0 <= prod < 2*X
+     * This is done because addition modulo X breaks at 2^32 when X is not a
+     * power of 2:
+     *     (((2^32 - 1) % X) + 1) % X != (2^32) % X
+     */
+    uint32_t cons;   /* Offset of next item to be consumed by control tools. */
+    uint32_t prod;   /* Offset of next item to be produced by Xen.           */
+    /*  Records follow immediately after the meta-data header.    */
+};
+
+#endif /* __XEN_PUBLIC_TRACE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/trace.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/foreign/mkchecker.py
===================================================================
--- xen/interface/foreign/mkchecker.py	(.../stable/6/sys)	(revision 0)
+++ xen/interface/foreign/mkchecker.py	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,58 @@
+#!/usr/bin/python
+
+import sys;
+from structs import structs;
+
+# command line arguments
+arch    = sys.argv[1];
+outfile = sys.argv[2];
+archs   = sys.argv[3:];
+
+f = open(outfile, "w");
+f.write('''
+/*
+ * sanity checks for generated foreign headers:
+ *  - verify struct sizes
+ *
+ * generated by %s -- DO NOT EDIT
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <inttypes.h>
+#include "../xen.h"
+''');
+
+for a in archs:
+    f.write('#include "%s.h"\n' % a);
+
+f.write('int main(int argc, char *argv[])\n{\n');
+
+f.write('\tprintf("\\n");');
+f.write('printf("%-25s |", "structs");\n');
+for a in archs:
+    f.write('\tprintf("%%8s", "%s");\n' % a);
+f.write('\tprintf("\\n");');
+
+f.write('\tprintf("\\n");');
+for struct in structs:
+    f.write('\tprintf("%%-25s |", "%s");\n' % struct);
+    for a in archs:
+        if a == arch:
+            s = struct; # native
+        else:
+            s = struct + "_" + a;
+        f.write('#ifdef %s_has_no_%s\n' % (a, struct));
+        f.write('\tprintf("%8s", "-");\n');
+        f.write("#else\n");
+        f.write('\tprintf("%%8zd", sizeof(struct %s));\n' % s);
+        f.write("#endif\n");
+
+    f.write('\tprintf("\\n");\n\n');
+
+f.write('\tprintf("\\n");\n');
+f.write('\texit(0);\n');
+f.write('}\n');
+
+f.close();
+

Property changes on: xen/interface/foreign/mkchecker.py
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/foreign/mkheader.py
===================================================================
--- xen/interface/foreign/mkheader.py	(.../stable/6/sys)	(revision 0)
+++ xen/interface/foreign/mkheader.py	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,167 @@
+#!/usr/bin/python
+
+import sys, re;
+from structs import unions, structs, defines;
+
+# command line arguments
+arch    = sys.argv[1];
+outfile = sys.argv[2];
+infiles = sys.argv[3:];
+
+
+###########################################################################
+# configuration #2: architecture information
+
+inttypes = {};
+header = {};
+footer = {};
+
+# x86_32
+inttypes["x86_32"] = {
+    "unsigned long" : "uint32_t",
+    "long"          : "uint32_t",
+    "xen_pfn_t"     : "uint32_t",
+};
+header["x86_32"] = """
+#define __i386___X86_32 1
+#pragma pack(4)
+""";
+footer["x86_32"] = """
+#pragma pack()
+""";
+
+# x86_64
+inttypes["x86_64"] = {
+    "unsigned long" : "__align8__ uint64_t",
+    "long"          : "__align8__ uint64_t",
+    "xen_pfn_t"     : "__align8__ uint64_t",
+};
+header["x86_64"] = """
+#ifdef __GNUC__
+# define __DECL_REG(name) union { uint64_t r ## name, e ## name; }
+# define __align8__ __attribute__((aligned (8)))
+#else
+# define __DECL_REG(name) uint64_t r ## name
+# define __align8__ FIXME
+#endif
+#define __x86_64___X86_64 1
+""";
+
+# ia64
+inttypes["ia64"] = {
+    "unsigned long" : "__align8__ uint64_t",
+    "long"          : "__align8__ uint64_t",
+    "xen_pfn_t"     : "__align8__ uint64_t",
+    "long double"   : "__align16__ ldouble_t",
+};
+header["ia64"] = """
+#define __align8__ __attribute__((aligned (8)))
+#define __align16__ __attribute__((aligned (16)))
+typedef unsigned char ldouble_t[16];
+""";
+
+
+###########################################################################
+# main
+
+input  = "";
+output = "";
+fileid = re.sub("[-.]", "_", "__FOREIGN_%s__" % outfile.upper());
+
+# read input header files
+for name in infiles:
+    f = open(name, "r");
+    input += f.read();
+    f.close();
+
+# add header
+output += """
+/*
+ * public xen defines and struct for %s
+ * generated by %s -- DO NOT EDIT
+ */
+
+#ifndef %s
+#define %s 1
+
+""" % (arch, sys.argv[0], fileid, fileid)
+
+if arch in header:
+    output += header[arch];
+    output += "\n";
+
+# add defines to output
+for line in re.findall("#define[^\n]+", input):
+    for define in defines:
+        regex = "#define\s+%s\\b" % define;
+        match = re.search(regex, line);
+        if None == match:
+            continue;
+        if define.upper()[0] == define[0]:
+            replace = define + "_" + arch.upper();
+        else:
+            replace = define + "_" + arch;
+        regex = "\\b%s\\b" % define;
+        output += re.sub(regex, replace, line) + "\n";
+output += "\n";
+
+# delete defines, comments, empty lines
+input = re.sub("#define[^\n]+\n", "", input);
+input = re.compile("/\*(.*?)\*/", re.S).sub("", input)
+input = re.compile("\n\s*\n", re.S).sub("\n", input);
+
+# add unions to output
+for union in unions:
+    regex = "union\s+%s\s*\{(.*?)\n\};" % union;
+    match = re.search(regex, input, re.S)
+    if None == match:
+        output += "#define %s_has_no_%s 1\n" % (arch, union);
+    else:
+        output += "union %s_%s {%s\n};\n" % (union, arch, match.group(1));
+    output += "\n";
+
+# add structs to output
+for struct in structs:
+    regex = "struct\s+%s\s*\{(.*?)\n\};" % struct;
+    match = re.search(regex, input, re.S)
+    if None == match:
+        output += "#define %s_has_no_%s 1\n" % (arch, struct);
+    else:
+        output += "struct %s_%s {%s\n};\n" % (struct, arch, match.group(1));
+        output += "typedef struct %s_%s %s_%s_t;\n" % (struct, arch, struct, arch);
+    output += "\n";
+
+# add footer
+if arch in footer:
+    output += footer[arch];
+    output += "\n";
+output += "#endif /* %s */\n" % fileid;
+
+# replace: defines
+for define in defines:
+    if define.upper()[0] == define[0]:
+        replace = define + "_" + arch.upper();
+    else:
+        replace = define + "_" + arch;
+    output = re.sub("\\b%s\\b" % define, replace, output);
+
+# replace: unions
+for union in unions:
+    output = re.sub("\\b(union\s+%s)\\b" % union, "\\1_%s" % arch, output);
+
+# replace: structs + struct typedefs
+for struct in structs:
+    output = re.sub("\\b(struct\s+%s)\\b" % struct, "\\1_%s" % arch, output);
+    output = re.sub("\\b(%s)_t\\b" % struct, "\\1_%s_t" % arch, output);
+
+# replace: integer types
+integers = inttypes[arch].keys();
+integers.sort(lambda a, b: cmp(len(b),len(a)));
+for type in integers:
+    output = re.sub("\\b%s\\b" % type, inttypes[arch][type], output);
+
+# print results
+f = open(outfile, "w");
+f.write(output);
+f.close;
+

Property changes on: xen/interface/foreign/mkheader.py
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/foreign/reference.size
===================================================================
--- xen/interface/foreign/reference.size	(.../stable/6/sys)	(revision 0)
+++ xen/interface/foreign/reference.size	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,17 @@
+
+structs              |  x86_32  x86_64    ia64
+
+start_info           |    1104    1152    1152
+trap_info            |       8      16       -
+pt_fpreg             |       -       -      16
+cpu_user_regs        |      68     200     496
+xen_ia64_boot_param  |       -       -      96
+ia64_tr_entry        |       -       -      32
+vcpu_extra_regs      |       -       -     536
+vcpu_guest_context   |    2800    5168    1056
+arch_vcpu_info       |      24      16       0
+vcpu_time_info       |      32      32      32
+vcpu_info            |      64      64      48
+arch_shared_info     |     268     280     272
+shared_info          |    2584    3368    4384
+

Property changes on: xen/interface/foreign/reference.size
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/foreign/Makefile
===================================================================
--- xen/interface/foreign/Makefile	(.../stable/6/sys)	(revision 0)
+++ xen/interface/foreign/Makefile	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,37 @@
+XEN_ROOT=../../../..
+include $(XEN_ROOT)/Config.mk
+
+architectures := x86_32 x86_64 ia64
+headers := $(patsubst %, %.h, $(architectures))
+scripts := $(wildcard *.py)
+
+.PHONY: all clean check-headers
+all: $(headers) check-headers
+
+clean:
+	rm -f $(headers)
+	rm -f checker checker.c $(XEN_TARGET_ARCH).size
+	rm -f *.pyc *.o *~
+
+ifeq ($(CROSS_COMPILE)$(XEN_TARGET_ARCH),$(XEN_COMPILE_ARCH))
+check-headers: checker
+	./checker > $(XEN_TARGET_ARCH).size
+	diff -u reference.size $(XEN_TARGET_ARCH).size
+checker: checker.c $(headers)
+	$(HOSTCC) $(HOSTCFLAGS) -o $@ $<
+else
+check-headers:
+	@echo "cross build: skipping check"
+endif
+
+x86_32.h: ../arch-x86/xen-x86_32.h ../arch-x86/xen.h ../xen.h $(scripts)
+	python mkheader.py $* $@ $(filter %.h,$^)
+
+x86_64.h: ../arch-x86/xen-x86_64.h ../arch-x86/xen.h ../xen.h $(scripts)
+	python mkheader.py $* $@ $(filter %.h,$^)
+
+ia64.h: ../arch-ia64.h ../xen.h $(scripts)
+	python mkheader.py $* $@ $(filter %.h,$^)
+
+checker.c: $(scripts)
+	python mkchecker.py $(XEN_TARGET_ARCH) $@ $(architectures)

Property changes on: xen/interface/foreign/Makefile
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/foreign/structs.py
===================================================================
--- xen/interface/foreign/structs.py	(.../stable/6/sys)	(revision 0)
+++ xen/interface/foreign/structs.py	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,58 @@
+# configuration: what needs translation
+
+unions  = [ "vcpu_cr_regs",
+            "vcpu_ar_regs" ];
+
+structs = [ "start_info",
+            "trap_info",
+            "pt_fpreg",
+            "cpu_user_regs",
+            "xen_ia64_boot_param",
+            "ia64_tr_entry",
+            "vcpu_tr_regs",
+            "vcpu_guest_context_regs",
+            "vcpu_guest_context",
+            "arch_vcpu_info",
+            "vcpu_time_info",
+            "vcpu_info",
+            "arch_shared_info",
+            "shared_info" ];
+
+defines = [ "__i386__",
+            "__x86_64__",
+
+            "FLAT_RING1_CS",
+            "FLAT_RING1_DS",
+            "FLAT_RING1_SS",
+
+            "FLAT_RING3_CS64",
+            "FLAT_RING3_DS64",
+            "FLAT_RING3_SS64",
+            "FLAT_KERNEL_CS64",
+            "FLAT_KERNEL_DS64",
+            "FLAT_KERNEL_SS64",
+
+            "FLAT_KERNEL_CS",
+            "FLAT_KERNEL_DS",
+            "FLAT_KERNEL_SS",
+
+            # x86_{32,64}
+            "_VGCF_i387_valid",
+            "VGCF_i387_valid",
+            "_VGCF_in_kernel",
+            "VGCF_in_kernel",
+            "_VGCF_failsafe_disables_events",
+            "VGCF_failsafe_disables_events",
+            "_VGCF_syscall_disables_events",
+            "VGCF_syscall_disables_events",
+            "_VGCF_online",
+            "VGCF_online",
+
+            # ia64
+            "VGCF_EXTRA_REGS",
+
+            # all archs
+            "xen_pfn_to_cr3",
+            "MAX_VIRT_CPUS",
+            "MAX_GUEST_CMDLINE" ];
+

Property changes on: xen/interface/foreign/structs.py
___________________________________________________________________
Added: fbsd:nokeywords
   + true


Property changes on: xen/interface/foreign
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/hvm_info_table.h
===================================================================
--- xen/interface/hvm/hvm_info_table.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/hvm_info_table.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,41 @@
+/******************************************************************************
+ * hvm/hvm_info_table.h
+ * 
+ * HVM parameter and information table, written into guest memory map.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+#define __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__
+
+#define HVM_INFO_PFN         0x09F
+#define HVM_INFO_OFFSET      0x800
+#define HVM_INFO_PADDR       ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET)
+
+struct hvm_info_table {
+    char        signature[8]; /* "HVM INFO" */
+    uint32_t    length;
+    uint8_t     checksum;
+    uint8_t     acpi_enabled;
+    uint8_t     apic_mode;
+    uint32_t    nr_vcpus;
+};
+
+#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */

Property changes on: xen/interface/hvm/hvm_info_table.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/e820.h
===================================================================
--- xen/interface/hvm/e820.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/e820.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,34 @@
+
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_E820_H__
+#define __XEN_PUBLIC_HVM_E820_H__
+
+/* E820 location in HVM virtual address space. */
+#define HVM_E820_PAGE        0x00090000
+#define HVM_E820_NR_OFFSET   0x000001E8
+#define HVM_E820_OFFSET      0x000002D0
+
+#define HVM_BELOW_4G_RAM_END        0xF0000000
+#define HVM_BELOW_4G_MMIO_START     HVM_BELOW_4G_RAM_END
+#define HVM_BELOW_4G_MMIO_LENGTH    ((1ULL << 32) - HVM_BELOW_4G_MMIO_START)
+
+#endif /* __XEN_PUBLIC_HVM_E820_H__ */

Property changes on: xen/interface/hvm/e820.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/save.h
===================================================================
--- xen/interface/hvm/save.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/save.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,88 @@
+/* 
+ * hvm/save.h
+ *
+ * Structure definitions for HVM state that is held by Xen and must
+ * be saved along with the domain's memory and device-model state.
+ * 
+ * Copyright (c) 2007 XenSource Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_SAVE_H__
+#define __XEN_PUBLIC_HVM_SAVE_H__
+
+/*
+ * Structures in this header *must* have the same layout in 32bit 
+ * and 64bit environments: this means that all fields must be explicitly 
+ * sized types and aligned to their sizes, and the structs must be 
+ * a multiple of eight bytes long.
+ *
+ * Only the state necessary for saving and restoring (i.e. fields 
+ * that are analogous to actual hardware state) should go in this file. 
+ * Internal mechanisms should be kept in Xen-private headers.
+ */
+
+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
+#error "Anonymous structs/unions are a GNU extension."
+#endif
+
+/* 
+ * Each entry is preceded by a descriptor giving its type and length
+ */
+struct hvm_save_descriptor {
+    uint16_t typecode;          /* Used to demux the various types below */
+    uint16_t instance;          /* Further demux within a type */
+    uint32_t length;            /* In bytes, *not* including this descriptor */
+};
+
+
+/* 
+ * Each entry has a datatype associated with it: for example, the CPU state 
+ * is saved as a HVM_SAVE_TYPE(CPU), which has HVM_SAVE_LENGTH(CPU), 
+ * and is identified by a descriptor with typecode HVM_SAVE_CODE(CPU).
+ * DECLARE_HVM_SAVE_TYPE binds these things together with some type-system
+ * ugliness.
+ */
+
+#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type)                   \
+  struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; }
+
+#define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t)
+#define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x)))
+#define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c))
+
+
+/* 
+ * The series of save records is teminated by a zero-type, zero-length 
+ * descriptor.
+ */
+
+struct hvm_save_end {};
+DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end);
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../arch-x86/hvm/save.h"
+#elif defined(__ia64__)
+#include "../arch-ia64/hvm/save.h"
+#else
+#error "unsupported architecture"
+#endif
+
+#endif /* __XEN_PUBLIC_HVM_SAVE_H__ */

Property changes on: xen/interface/hvm/save.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/ioreq.h
===================================================================
--- xen/interface/hvm/ioreq.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/ioreq.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,127 @@
+/*
+ * ioreq.h: I/O request definitions for device models
+ * Copyright (c) 2004, Intel Corporation.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _IOREQ_H_
+#define _IOREQ_H_
+
+#define IOREQ_READ      1
+#define IOREQ_WRITE     0
+
+#define STATE_IOREQ_NONE        0
+#define STATE_IOREQ_READY       1
+#define STATE_IOREQ_INPROCESS   2
+#define STATE_IORESP_READY      3
+
+#define IOREQ_TYPE_PIO          0 /* pio */
+#define IOREQ_TYPE_COPY         1 /* mmio ops */
+#define IOREQ_TYPE_TIMEOFFSET   7
+#define IOREQ_TYPE_INVALIDATE   8 /* mapcache */
+
+/*
+ * VMExit dispatcher should cooperate with instruction decoder to
+ * prepare this structure and notify service OS and DM by sending
+ * virq
+ */
+struct ioreq {
+    uint64_t addr;          /*  physical address            */
+    uint64_t size;          /*  size in bytes               */
+    uint64_t count;         /*  for rep prefixes            */
+    uint64_t data;          /*  data (or paddr of data)     */
+    uint8_t state:4;
+    uint8_t data_is_ptr:1;  /*  if 1, data above is the guest paddr 
+                             *   of the real data to use.   */
+    uint8_t dir:1;          /*  1=read, 0=write             */
+    uint8_t df:1;
+    uint8_t pad:1;
+    uint8_t type;           /* I/O type                     */
+    uint8_t _pad0[6];
+    uint64_t io_count;      /* How many IO done on a vcpu   */
+};
+typedef struct ioreq ioreq_t;
+
+struct vcpu_iodata {
+    struct ioreq vp_ioreq;
+    /* Event channel port, used for notifications to/from the device model. */
+    uint32_t vp_eport;
+    uint32_t _pad0;
+};
+typedef struct vcpu_iodata vcpu_iodata_t;
+
+struct shared_iopage {
+    struct vcpu_iodata   vcpu_iodata[1];
+};
+typedef struct shared_iopage shared_iopage_t;
+
+struct buf_ioreq {
+    uint8_t  type;   /* I/O type                    */
+    uint8_t  pad:1;
+    uint8_t  dir:1;  /* 1=read, 0=write             */
+    uint8_t  size:2; /* 0=>1, 1=>2, 2=>4, 3=>8. If 8, use two buf_ioreqs */
+    uint32_t addr:20;/* physical address            */
+    uint32_t data;   /* data                        */
+};
+typedef struct buf_ioreq buf_ioreq_t;
+
+#define IOREQ_BUFFER_SLOT_NUM     511 /* 8 bytes each, plus 2 4-byte indexes */
+struct buffered_iopage {
+    unsigned int read_pointer;
+    unsigned int write_pointer;
+    buf_ioreq_t buf_ioreq[IOREQ_BUFFER_SLOT_NUM];
+}; /* NB. Size of this structure must be no greater than one page. */
+typedef struct buffered_iopage buffered_iopage_t;
+
+#if defined(__ia64__)
+struct pio_buffer {
+    uint32_t page_offset;
+    uint32_t pointer;
+    uint32_t data_end;
+    uint32_t buf_size;
+    void *opaque;
+};
+
+#define PIO_BUFFER_IDE_PRIMARY   0 /* I/O port = 0x1F0 */
+#define PIO_BUFFER_IDE_SECONDARY 1 /* I/O port = 0x170 */
+#define PIO_BUFFER_ENTRY_NUM     2
+struct buffered_piopage {
+    struct pio_buffer pio[PIO_BUFFER_ENTRY_NUM];
+    uint8_t buffer[1];
+};
+#endif /* defined(__ia64__) */
+
+#define ACPI_PM1A_EVT_BLK_ADDRESS           0x0000000000001f40
+#define ACPI_PM1A_CNT_BLK_ADDRESS           (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04)
+#define ACPI_PM_TMR_BLK_ADDRESS             (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08)
+#define ACPI_GPE0_BLK_ADDRESS               (ACPI_PM_TMR_BLK_ADDRESS + 0x20)
+#define ACPI_GPE0_BLK_LEN                   0x08
+
+#endif /* _IOREQ_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/hvm/ioreq.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/vmx_assist.h
===================================================================
--- xen/interface/hvm/vmx_assist.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/vmx_assist.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,122 @@
+/*
+ * vmx_assist.h: Context definitions for the VMXASSIST world switch.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Leendert van Doorn, leendert@watson.ibm.com
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _VMX_ASSIST_H_
+#define _VMX_ASSIST_H_
+
+#define VMXASSIST_BASE         0xD0000
+#define VMXASSIST_MAGIC        0x17101966
+#define VMXASSIST_MAGIC_OFFSET (VMXASSIST_BASE+8)
+
+#define VMXASSIST_NEW_CONTEXT (VMXASSIST_BASE + 12)
+#define VMXASSIST_OLD_CONTEXT (VMXASSIST_NEW_CONTEXT + 4)
+
+#ifndef __ASSEMBLY__
+
+#define NR_EXCEPTION_HANDLER    32
+#define NR_INTERRUPT_HANDLERS   16
+#define NR_TRAPS        (NR_EXCEPTION_HANDLER+NR_INTERRUPT_HANDLERS)
+
+union vmcs_arbytes {
+    struct arbyte_fields {
+        unsigned int seg_type : 4,
+            s         : 1,
+            dpl       : 2,
+            p         : 1,
+            reserved0 : 4,
+            avl       : 1,
+            reserved1 : 1,
+            default_ops_size: 1,
+            g         : 1,
+            null_bit  : 1,
+            reserved2 : 15;
+    } fields;
+    unsigned int bytes;
+};
+
+/*
+ * World switch state
+ */
+struct vmx_assist_context {
+    uint32_t  eip;        /* execution pointer */
+    uint32_t  esp;        /* stack pointer */
+    uint32_t  eflags;     /* flags register */
+    uint32_t  cr0;
+    uint32_t  cr3;        /* page table directory */
+    uint32_t  cr4;
+    uint32_t  idtr_limit; /* idt */
+    uint32_t  idtr_base;
+    uint32_t  gdtr_limit; /* gdt */
+    uint32_t  gdtr_base;
+    uint32_t  cs_sel;     /* cs selector */
+    uint32_t  cs_limit;
+    uint32_t  cs_base;
+    union vmcs_arbytes cs_arbytes;
+    uint32_t  ds_sel;     /* ds selector */
+    uint32_t  ds_limit;
+    uint32_t  ds_base;
+    union vmcs_arbytes ds_arbytes;
+    uint32_t  es_sel;     /* es selector */
+    uint32_t  es_limit;
+    uint32_t  es_base;
+    union vmcs_arbytes es_arbytes;
+    uint32_t  ss_sel;     /* ss selector */
+    uint32_t  ss_limit;
+    uint32_t  ss_base;
+    union vmcs_arbytes ss_arbytes;
+    uint32_t  fs_sel;     /* fs selector */
+    uint32_t  fs_limit;
+    uint32_t  fs_base;
+    union vmcs_arbytes fs_arbytes;
+    uint32_t  gs_sel;     /* gs selector */
+    uint32_t  gs_limit;
+    uint32_t  gs_base;
+    union vmcs_arbytes gs_arbytes;
+    uint32_t  tr_sel;     /* task selector */
+    uint32_t  tr_limit;
+    uint32_t  tr_base;
+    union vmcs_arbytes tr_arbytes;
+    uint32_t  ldtr_sel;   /* ldtr selector */
+    uint32_t  ldtr_limit;
+    uint32_t  ldtr_base;
+    union vmcs_arbytes ldtr_arbytes;
+
+    unsigned char rm_irqbase[2];
+};
+typedef struct vmx_assist_context vmx_assist_context_t;
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _VMX_ASSIST_H_ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/hvm/vmx_assist.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/params.h
===================================================================
--- xen/interface/hvm/params.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/params.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,98 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_PARAMS_H__
+#define __XEN_PUBLIC_HVM_PARAMS_H__
+
+#include <xen/interface/hvm/hvm_op.h>
+
+/*
+ * Parameter space for HVMOP_{set,get}_param.
+ */
+
+/*
+ * How should CPU0 event-channel notifications be delivered?
+ * val[63:56] == 0: val[55:0] is a delivery GSI (Global System Interrupt).
+ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows:
+ *                  Domain = val[47:32], Bus  = val[31:16],
+ *                  DevFn  = val[15: 8], IntX = val[ 1: 0]
+ * If val == 0 then CPU0 event-channel notifications are not delivered.
+ */
+#define HVM_PARAM_CALLBACK_IRQ 0
+
+/*
+ * These are not used by Xen. They are here for convenience of HVM-guest
+ * xenbus implementations.
+ */
+#define HVM_PARAM_STORE_PFN    1
+#define HVM_PARAM_STORE_EVTCHN 2
+
+#define HVM_PARAM_PAE_ENABLED  4
+
+#define HVM_PARAM_IOREQ_PFN    5
+
+#define HVM_PARAM_BUFIOREQ_PFN 6
+
+#ifdef __ia64__
+#define HVM_PARAM_NVRAM_FD     7
+#define HVM_PARAM_VHPT_SIZE    8
+#define HVM_PARAM_BUFPIOREQ_PFN	9
+#endif
+
+/*
+ * Set mode for virtual timers (currently x86 only):
+ *  delay_for_missed_ticks (default):
+ *   Do not advance a vcpu's time beyond the correct delivery time for
+ *   interrupts that have been missed due to preemption. Deliver missed
+ *   interrupts when the vcpu is rescheduled and advance the vcpu's virtual
+ *   time stepwise for each one.
+ *  no_delay_for_missed_ticks:
+ *   As above, missed interrupts are delivered, but guest time always tracks
+ *   wallclock (i.e., real) time while doing so.
+ *  no_missed_ticks_pending:
+ *   No missed interrupts are held pending. Instead, to ensure ticks are
+ *   delivered at some non-zero rate, if we detect missed ticks then the
+ *   internal tick alarm is not disabled if the VCPU is preempted during the
+ *   next tick period.
+ *  one_missed_tick_pending:
+ *   Missed interrupts are collapsed together and delivered as one 'late tick'.
+ *   Guest time always tracks wallclock (i.e., real) time.
+ */
+#define HVM_PARAM_TIMER_MODE   10
+#define HVMPTM_delay_for_missed_ticks    0
+#define HVMPTM_no_delay_for_missed_ticks 1
+#define HVMPTM_no_missed_ticks_pending   2
+#define HVMPTM_one_missed_tick_pending   3
+
+/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */
+#define HVM_PARAM_HPET_ENABLED 11
+
+/* Identity-map page directory used by Intel EPT when CR0.PG=0. */
+#define HVM_PARAM_IDENT_PT     12
+
+/* Device Model domain, defaults to 0. */
+#define HVM_PARAM_DM_DOMAIN    13
+
+/* ACPI S state: currently support S0 and S3 on x86. */
+#define HVM_PARAM_ACPI_S_STATE 14
+
+#define HVM_NR_PARAMS          15
+
+#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */

Property changes on: xen/interface/hvm/params.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/hvm/hvm_op.h
===================================================================
--- xen/interface/hvm/hvm_op.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/hvm/hvm_op.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,131 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__
+#define __XEN_PUBLIC_HVM_HVM_OP_H__
+
+/* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */
+#define HVMOP_set_param           0
+#define HVMOP_get_param           1
+struct xen_hvm_param {
+    domid_t  domid;    /* IN */
+    uint32_t index;    /* IN */
+    uint64_t value;    /* IN/OUT */
+};
+typedef struct xen_hvm_param xen_hvm_param_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
+
+/* Set the logical level of one of a domain's PCI INTx wires. */
+#define HVMOP_set_pci_intx_level  2
+struct xen_hvm_set_pci_intx_level {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* PCI INTx identification in PCI topology (domain:bus:device:intx). */
+    uint8_t  domain, bus, device, intx;
+    /* Assertion level (0 = unasserted, 1 = asserted). */
+    uint8_t  level;
+};
+typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t);
+
+/* Set the logical level of one of a domain's ISA IRQ wires. */
+#define HVMOP_set_isa_irq_level   3
+struct xen_hvm_set_isa_irq_level {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* ISA device identification, by ISA IRQ (0-15). */
+    uint8_t  isa_irq;
+    /* Assertion level (0 = unasserted, 1 = asserted). */
+    uint8_t  level;
+};
+typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t);
+
+#define HVMOP_set_pci_link_route  4
+struct xen_hvm_set_pci_link_route {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* PCI link identifier (0-3). */
+    uint8_t  link;
+    /* ISA IRQ (1-15), or 0 (disable link). */
+    uint8_t  isa_irq;
+};
+typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
+
+/* Flushes all VCPU TLBs: @arg must be NULL. */
+#define HVMOP_flush_tlbs          5
+
+/* Following tools-only interfaces may change in future. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+
+/* Track dirty VRAM. */
+#define HVMOP_track_dirty_vram    6
+struct xen_hvm_track_dirty_vram {
+    /* Domain to be tracked. */
+    domid_t  domid;
+    /* First pfn to track. */
+    uint64_aligned_t first_pfn;
+    /* Number of pages to track. */
+    uint64_aligned_t nr;
+    /* OUT variable. */
+    /* Dirty bitmap buffer. */
+    XEN_GUEST_HANDLE_64(uint8) dirty_bitmap;
+};
+typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t);
+
+/* Notify that some pages got modified by the Device Model. */
+#define HVMOP_modified_memory    7
+struct xen_hvm_modified_memory {
+    /* Domain to be updated. */
+    domid_t  domid;
+    /* First pfn. */
+    uint64_aligned_t first_pfn;
+    /* Number of pages. */
+    uint64_aligned_t nr;
+};
+typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t);
+
+#define HVMOP_set_mem_type    8
+typedef enum {
+    HVMMEM_ram_rw,             /* Normal read/write guest RAM */
+    HVMMEM_ram_ro,             /* Read-only; writes are discarded */
+    HVMMEM_mmio_dm,            /* Reads and write go to the device model */
+} hvmmem_type_t;
+/* Notify that a region of memory is to be treated in a specific way. */
+struct xen_hvm_set_mem_type {
+    /* Domain to be updated. */
+    domid_t domid;
+    /* Memory type */
+    hvmmem_type_t hvmmem_type;
+    /* First pfn. */
+    uint64_aligned_t first_pfn;
+    /* Number of pages. */
+    uint64_aligned_t nr;
+};
+typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t);
+
+
+#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */
+
+#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */

Property changes on: xen/interface/hvm/hvm_op.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true


Property changes on: xen/interface/hvm
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86_64.h
===================================================================
--- xen/interface/arch-x86_64.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86_64.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,27 @@
+/******************************************************************************
+ * arch-x86_64.h
+ * 
+ * Guest OS interface to x86 64-bit Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include "arch-x86/xen.h"

Property changes on: xen/interface/arch-x86_64.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86/xen.h
===================================================================
--- xen/interface/arch-x86/xen.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/xen.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,199 @@
+/******************************************************************************
+ * arch-x86/xen.h
+ * 
+ * Guest OS interface to x86 Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_H__
+
+/* Structural guest handles introduced in 0x00030201. */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef struct { type *p; } __guest_handle_ ## name
+#else
+#error "using old handle"
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)  \
+    typedef type * __guest_handle_ ## name
+#endif
+
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define __XEN_GUEST_HANDLE(name)        __guest_handle_ ## name
+#define XEN_GUEST_HANDLE(name)          __XEN_GUEST_HANDLE(name)
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
+#endif
+
+#if defined(__i386__)
+#include <xen/interface/arch-x86/xen-x86_32.h>
+#elif defined(__x86_64__)
+#include <xen/interface/arch-x86/xen-x86_64.h>
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
+#endif
+
+/*
+ * SEGMENT DESCRIPTOR TABLES
+ */
+/*
+ * A number of GDT entries are reserved by Xen. These are not situated at the
+ * start of the GDT because some stupid OSes export hard-coded selector values
+ * in their ABI. These hard-coded values are always near the start of the GDT,
+ * so Xen places itself out of the way, at the far end of the GDT.
+ */
+#define FIRST_RESERVED_GDT_PAGE  14
+#define FIRST_RESERVED_GDT_BYTE  (FIRST_RESERVED_GDT_PAGE * 4096)
+#define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8)
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+#define MAX_VIRT_CPUS 32
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long xen_ulong_t;
+
+/*
+ * Send an array of these to HYPERVISOR_set_trap_table().
+ * The privilege level specifies which modes may enter a trap via a software
+ * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate
+ * privilege levels as follows:
+ *  Level == 0: Noone may enter
+ *  Level == 1: Kernel may enter
+ *  Level == 2: Kernel may enter
+ *  Level == 3: Everyone may enter
+ */
+#define TI_GET_DPL(_ti)      ((_ti)->flags & 3)
+#define TI_GET_IF(_ti)       ((_ti)->flags & 4)
+#define TI_SET_DPL(_ti,_dpl) ((_ti)->flags |= (_dpl))
+#define TI_SET_IF(_ti,_if)   ((_ti)->flags |= ((!!(_if))<<2))
+struct trap_info {
+    uint8_t       vector;  /* exception vector                              */
+    uint8_t       flags;   /* 0-3: privilege level; 4: clear event enable?  */
+    uint16_t      cs;      /* code selector                                 */
+    unsigned long address; /* code offset                                   */
+};
+typedef struct trap_info trap_info_t;
+DEFINE_XEN_GUEST_HANDLE(trap_info_t);
+
+typedef uint64_t tsc_timestamp_t; /* RDTSC timestamp */
+
+/*
+ * The following is all CPU context. Note that the fpu_ctxt block is filled 
+ * in by FXSAVE if the CPU has feature FXSR; otherwise FSAVE is used.
+ */
+struct vcpu_guest_context {
+    /* FPU registers come first so they can be aligned for FXSAVE/FXRSTOR. */
+    struct { char x[512]; } fpu_ctxt;       /* User-level FPU registers     */
+#define VGCF_I387_VALID                (1<<0)
+#define VGCF_IN_KERNEL                 (1<<2)
+#define _VGCF_i387_valid               0
+#define VGCF_i387_valid                (1<<_VGCF_i387_valid)
+#define _VGCF_in_kernel                2
+#define VGCF_in_kernel                 (1<<_VGCF_in_kernel)
+#define _VGCF_failsafe_disables_events 3
+#define VGCF_failsafe_disables_events  (1<<_VGCF_failsafe_disables_events)
+#define _VGCF_syscall_disables_events  4
+#define VGCF_syscall_disables_events   (1<<_VGCF_syscall_disables_events)
+#define _VGCF_online                   5
+#define VGCF_online                    (1<<_VGCF_online)
+    unsigned long flags;                    /* VGCF_* flags                 */
+    struct cpu_user_regs user_regs;         /* User-level CPU registers     */
+    struct trap_info trap_ctxt[256];        /* Virtual IDT                  */
+    unsigned long ldt_base, ldt_ents;       /* LDT (linear address, # ents) */
+    unsigned long gdt_frames[16], gdt_ents; /* GDT (machine frames, # ents) */
+    unsigned long kernel_ss, kernel_sp;     /* Virtual TSS (only SS1/SP1)   */
+    /* NB. User pagetable on x86/64 is placed in ctrlreg[1]. */
+    unsigned long ctrlreg[8];               /* CR0-CR7 (control registers)  */
+    unsigned long debugreg[8];              /* DB0-DB7 (debug registers)    */
+#ifdef __i386__
+    unsigned long event_callback_cs;        /* CS:EIP of event callback     */
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_cs;     /* CS:EIP of failsafe callback  */
+    unsigned long failsafe_callback_eip;
+#else
+    unsigned long event_callback_eip;
+    unsigned long failsafe_callback_eip;
+#ifdef __XEN__
+    union {
+        unsigned long syscall_callback_eip;
+        struct {
+            unsigned int event_callback_cs;    /* compat CS of event cb     */
+            unsigned int failsafe_callback_cs; /* compat CS of failsafe cb  */
+        };
+    } u;
+#else
+    unsigned long syscall_callback_eip;
+#endif
+#endif
+    unsigned long vm_assist;                /* VMASST_TYPE_* bitmap */
+#ifdef __x86_64__
+    /* Segment base addresses. */
+    uint64_t      fs_base;
+    uint64_t      gs_base_kernel;
+    uint64_t      gs_base_user;
+#endif
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+struct arch_shared_info {
+    unsigned long max_pfn;                  /* max pfn that appears in table */
+    /* Frame containing list of mfns containing list of mfns containing p2m. */
+    xen_pfn_t     pfn_to_mfn_frame_list_list;
+    unsigned long nmi_reason;
+    uint64_t pad[32];
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+#endif /* !__ASSEMBLY__ */
+
+/*
+ * Prefix forces emulation of some non-trapping instructions.
+ * Currently only CPUID.
+ */
+#ifdef __ASSEMBLY__
+#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
+#define XEN_CPUID          XEN_EMULATE_PREFIX cpuid
+#else
+#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
+#define XEN_CPUID          XEN_EMULATE_PREFIX "cpuid"
+#endif
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/arch-x86/xen.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86/xen-x86_32.h
===================================================================
--- xen/interface/arch-x86/xen-x86_32.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/xen-x86_32.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,183 @@
+/******************************************************************************
+ * xen-x86_32.h
+ * 
+ * Guest OS interface to x86 32-bit Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2007, K A Fraser
+ */
+
+#include <sys/param.h>
+#include <sys/types.h>
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__
+
+/*
+ * Hypercall interface:
+ *  Input:  %ebx, %ecx, %edx, %esi, %edi (arguments 1-5)
+ *  Output: %eax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ *  call hypercall_page + hypercall-number * 32
+ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx)
+ */
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+/*
+ * Legacy hypercall interface:
+ * As above, except the entry sequence to the hypervisor is:
+ *  mov $hypercall-number*32,%eax ; int $0x82
+ */
+#define TRAP_INSTR "int $0x82"
+#endif
+
+/*
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+#define FLAT_RING1_CS 0xe019    /* GDT index 259 */
+#define FLAT_RING1_DS 0xe021    /* GDT index 260 */
+#define FLAT_RING1_SS 0xe021    /* GDT index 260 */
+#define FLAT_RING3_CS 0xe02b    /* GDT index 261 */
+#define FLAT_RING3_DS 0xe033    /* GDT index 262 */
+#define FLAT_RING3_SS 0xe033    /* GDT index 262 */
+
+#define FLAT_KERNEL_CS FLAT_RING1_CS
+#define FLAT_KERNEL_DS FLAT_RING1_DS
+#define FLAT_KERNEL_SS FLAT_RING1_SS
+#define FLAT_USER_CS    FLAT_RING3_CS
+#define FLAT_USER_DS    FLAT_RING3_DS
+#define FLAT_USER_SS    FLAT_RING3_SS
+
+#define __HYPERVISOR_VIRT_START_PAE    0xF5800000
+#define __MACH2PHYS_VIRT_START_PAE     0xF5800000
+#define __MACH2PHYS_VIRT_END_PAE       0xF6800000
+#define HYPERVISOR_VIRT_START_PAE      \
+    mk_unsigned_long(__HYPERVISOR_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_START_PAE       \
+    mk_unsigned_long(__MACH2PHYS_VIRT_START_PAE)
+#define MACH2PHYS_VIRT_END_PAE         \
+    mk_unsigned_long(__MACH2PHYS_VIRT_END_PAE)
+
+/* Non-PAE bounds are obsolete. */
+#define __HYPERVISOR_VIRT_START_NONPAE 0xFC000000
+#define __MACH2PHYS_VIRT_START_NONPAE  0xFC000000
+#define __MACH2PHYS_VIRT_END_NONPAE    0xFC400000
+#define HYPERVISOR_VIRT_START_NONPAE   \
+    mk_unsigned_long(__HYPERVISOR_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_START_NONPAE    \
+    mk_unsigned_long(__MACH2PHYS_VIRT_START_NONPAE)
+#define MACH2PHYS_VIRT_END_NONPAE      \
+    mk_unsigned_long(__MACH2PHYS_VIRT_END_NONPAE)
+
+#define __HYPERVISOR_VIRT_START __HYPERVISOR_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_START  __MACH2PHYS_VIRT_START_PAE
+#define __MACH2PHYS_VIRT_END    __MACH2PHYS_VIRT_END_PAE
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#endif
+
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>2)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)MACH2PHYS_VIRT_START)
+#endif
+
+/* 32-/64-bit invariability for control interfaces (domctl/sysctl). */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#undef ___DEFINE_XEN_GUEST_HANDLE
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type)                  \
+    typedef struct { type *p; }                                 \
+        __guest_handle_ ## name;                                \
+    typedef struct { union { type *p; uint64_aligned_t q; }; }  \
+        __guest_handle_64_ ## name
+#undef set_xen_guest_handle
+#define set_xen_guest_handle(hnd, val)                      \
+    do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0;   \
+         (hnd).p = val;                                     \
+    } while ( 0 )
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+#define __XEN_GUEST_HANDLE_64(name) __guest_handle_64_ ## name
+#define XEN_GUEST_HANDLE_64(name) __XEN_GUEST_HANDLE_64(name)
+#endif
+
+#ifndef __ASSEMBLY__
+
+struct cpu_user_regs {
+    uint32_t ebx;
+    uint32_t ecx;
+    uint32_t edx;
+    uint32_t esi;
+    uint32_t edi;
+    uint32_t ebp;
+    uint32_t eax;
+    uint16_t error_code;    /* private */
+    uint16_t entry_vector;  /* private */
+    uint32_t eip;
+    uint16_t cs;
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad0;
+    uint32_t eflags;        /* eflags.IF == !saved_upcall_mask */
+    uint32_t esp;
+    uint16_t ss, _pad1;
+    uint16_t es, _pad2;
+    uint16_t ds, _pad3;
+    uint16_t fs, _pad4;
+    uint16_t gs, _pad5;
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+/*
+ * Page-directory addresses above 4GB do not fit into architectural %cr3.
+ * When accessing %cr3, or equivalent field in vcpu_guest_context, guests
+ * must use the following accessor macros to pack/unpack valid MFNs.
+ */
+#define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20))
+#define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20))
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad[5]; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+struct xen_callback {
+    unsigned long cs;
+    unsigned long eip;
+};
+typedef struct xen_callback xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_32_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/arch-x86/xen-x86_32.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86/cpuid.h
===================================================================
--- xen/interface/arch-x86/cpuid.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/cpuid.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,68 @@
+/******************************************************************************
+ * arch-x86/cpuid.h
+ * 
+ * CPUID interface to Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ * 
+ * Copyright (c) 2007 Citrix Systems, Inc.
+ * 
+ * Authors:
+ *    Keir Fraser <keir.fraser@citrix.com>
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__
+#define __XEN_PUBLIC_ARCH_X86_CPUID_H__
+
+/* Xen identification leaves start at 0x40000000. */
+#define XEN_CPUID_FIRST_LEAF 0x40000000
+#define XEN_CPUID_LEAF(i)    (XEN_CPUID_FIRST_LEAF + (i))
+
+/*
+ * Leaf 1 (0x40000000)
+ * EAX: Largest Xen-information leaf. All leaves up to an including @EAX
+ *      are supported by the Xen host.
+ * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification
+ *      of a Xen host.
+ */
+#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */
+#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */
+#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */
+
+/*
+ * Leaf 2 (0x40000001)
+ * EAX[31:16]: Xen major version.
+ * EAX[15: 0]: Xen minor version.
+ * EBX-EDX: Reserved (currently all zeroes).
+ */
+
+/*
+ * Leaf 3 (0x40000002)
+ * EAX: Number of hypercall transfer pages. This register is always guaranteed
+ *      to specify one hypercall page.
+ * EBX: Base address of Xen-specific MSRs.
+ * ECX: Features 1. Unused bits are set to zero.
+ * EDX: Features 2. Unused bits are set to zero.
+ */
+
+/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */
+#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0
+#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD  (1u<<0)
+
+#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */

Property changes on: xen/interface/arch-x86/cpuid.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: fbsd:nokeywords
   + true
Added: svn:eol-style
   + native

Index: xen/interface/arch-x86/hvm/save.h
===================================================================
--- xen/interface/arch-x86/hvm/save.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/hvm/save.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,429 @@
+/* 
+ * Structure definitions for HVM state that is held by Xen and must
+ * be saved along with the domain's memory and device-model state.
+ * 
+ * Copyright (c) 2007 XenSource Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_HVM_SAVE_X86_H__
+#define __XEN_PUBLIC_HVM_SAVE_X86_H__
+
+/* 
+ * Save/restore header: general info about the save file. 
+ */
+
+#define HVM_FILE_MAGIC   0x54381286
+#define HVM_FILE_VERSION 0x00000001
+
+struct hvm_save_header {
+    uint32_t magic;             /* Must be HVM_FILE_MAGIC */
+    uint32_t version;           /* File format version */
+    uint64_t changeset;         /* Version of Xen that saved this file */
+    uint32_t cpuid;             /* CPUID[0x01][%eax] on the saving machine */
+    uint32_t pad0;
+};
+
+DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header);
+
+
+/*
+ * Processor
+ */
+
+struct hvm_hw_cpu {
+    uint8_t  fpu_regs[512];
+
+    uint64_t rax;
+    uint64_t rbx;
+    uint64_t rcx;
+    uint64_t rdx;
+    uint64_t rbp;
+    uint64_t rsi;
+    uint64_t rdi;
+    uint64_t rsp;
+    uint64_t r8;
+    uint64_t r9;
+    uint64_t r10;
+    uint64_t r11;
+    uint64_t r12;
+    uint64_t r13;
+    uint64_t r14;
+    uint64_t r15;
+
+    uint64_t rip;
+    uint64_t rflags;
+
+    uint64_t cr0;
+    uint64_t cr2;
+    uint64_t cr3;
+    uint64_t cr4;
+
+    uint64_t dr0;
+    uint64_t dr1;
+    uint64_t dr2;
+    uint64_t dr3;
+    uint64_t dr6;
+    uint64_t dr7;    
+
+    uint32_t cs_sel;
+    uint32_t ds_sel;
+    uint32_t es_sel;
+    uint32_t fs_sel;
+    uint32_t gs_sel;
+    uint32_t ss_sel;
+    uint32_t tr_sel;
+    uint32_t ldtr_sel;
+
+    uint32_t cs_limit;
+    uint32_t ds_limit;
+    uint32_t es_limit;
+    uint32_t fs_limit;
+    uint32_t gs_limit;
+    uint32_t ss_limit;
+    uint32_t tr_limit;
+    uint32_t ldtr_limit;
+    uint32_t idtr_limit;
+    uint32_t gdtr_limit;
+
+    uint64_t cs_base;
+    uint64_t ds_base;
+    uint64_t es_base;
+    uint64_t fs_base;
+    uint64_t gs_base;
+    uint64_t ss_base;
+    uint64_t tr_base;
+    uint64_t ldtr_base;
+    uint64_t idtr_base;
+    uint64_t gdtr_base;
+
+    uint32_t cs_arbytes;
+    uint32_t ds_arbytes;
+    uint32_t es_arbytes;
+    uint32_t fs_arbytes;
+    uint32_t gs_arbytes;
+    uint32_t ss_arbytes;
+    uint32_t tr_arbytes;
+    uint32_t ldtr_arbytes;
+
+    uint32_t sysenter_cs;
+    uint32_t padding0;
+
+    uint64_t sysenter_esp;
+    uint64_t sysenter_eip;
+
+    /* msr for em64t */
+    uint64_t shadow_gs;
+
+    /* msr content saved/restored. */
+    uint64_t msr_flags;
+    uint64_t msr_lstar;
+    uint64_t msr_star;
+    uint64_t msr_cstar;
+    uint64_t msr_syscall_mask;
+    uint64_t msr_efer;
+
+    /* guest's idea of what rdtsc() would return */
+    uint64_t tsc;
+
+    /* pending event, if any */
+    union {
+        uint32_t pending_event;
+        struct {
+            uint8_t  pending_vector:8;
+            uint8_t  pending_type:3;
+            uint8_t  pending_error_valid:1;
+            uint32_t pending_reserved:19;
+            uint8_t  pending_valid:1;
+        };
+    };
+    /* error code for pending event */
+    uint32_t error_code;
+};
+
+DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu);
+
+
+/*
+ * PIC
+ */
+
+struct hvm_hw_vpic {
+    /* IR line bitmasks. */
+    uint8_t irr;
+    uint8_t imr;
+    uint8_t isr;
+
+    /* Line IRx maps to IRQ irq_base+x */
+    uint8_t irq_base;
+
+    /*
+     * Where are we in ICW2-4 initialisation (0 means no init in progress)?
+     * Bits 0-1 (=x): Next write at A=1 sets ICW(x+1).
+     * Bit 2: ICW1.IC4  (1 == ICW4 included in init sequence)
+     * Bit 3: ICW1.SNGL (0 == ICW3 included in init sequence)
+     */
+    uint8_t init_state:4;
+
+    /* IR line with highest priority. */
+    uint8_t priority_add:4;
+
+    /* Reads from A=0 obtain ISR or IRR? */
+    uint8_t readsel_isr:1;
+
+    /* Reads perform a polling read? */
+    uint8_t poll:1;
+
+    /* Automatically clear IRQs from the ISR during INTA? */
+    uint8_t auto_eoi:1;
+
+    /* Automatically rotate IRQ priorities during AEOI? */
+    uint8_t rotate_on_auto_eoi:1;
+
+    /* Exclude slave inputs when considering in-service IRQs? */
+    uint8_t special_fully_nested_mode:1;
+
+    /* Special mask mode excludes masked IRs from AEOI and priority checks. */
+    uint8_t special_mask_mode:1;
+
+    /* Is this a master PIC or slave PIC? (NB. This is not programmable.) */
+    uint8_t is_master:1;
+
+    /* Edge/trigger selection. */
+    uint8_t elcr;
+
+    /* Virtual INT output. */
+    uint8_t int_output;
+};
+
+DECLARE_HVM_SAVE_TYPE(PIC, 3, struct hvm_hw_vpic);
+
+
+/*
+ * IO-APIC
+ */
+
+#ifdef __ia64__
+#define VIOAPIC_IS_IOSAPIC 1
+#define VIOAPIC_NUM_PINS  24
+#else
+#define VIOAPIC_NUM_PINS  48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */
+#endif
+
+struct hvm_hw_vioapic {
+    uint64_t base_address;
+    uint32_t ioregsel;
+    uint32_t id;
+    union vioapic_redir_entry
+    {
+        uint64_t bits;
+        struct {
+            uint8_t vector;
+            uint8_t delivery_mode:3;
+            uint8_t dest_mode:1;
+            uint8_t delivery_status:1;
+            uint8_t polarity:1;
+            uint8_t remote_irr:1;
+            uint8_t trig_mode:1;
+            uint8_t mask:1;
+            uint8_t reserve:7;
+#if !VIOAPIC_IS_IOSAPIC
+            uint8_t reserved[4];
+            uint8_t dest_id;
+#else
+            uint8_t reserved[3];
+            uint16_t dest_id;
+#endif
+        } fields;
+    } redirtbl[VIOAPIC_NUM_PINS];
+};
+
+DECLARE_HVM_SAVE_TYPE(IOAPIC, 4, struct hvm_hw_vioapic);
+
+
+/*
+ * LAPIC
+ */
+
+struct hvm_hw_lapic {
+    uint64_t             apic_base_msr;
+    uint32_t             disabled; /* VLAPIC_xx_DISABLED */
+    uint32_t             timer_divisor;
+};
+
+DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic);
+
+struct hvm_hw_lapic_regs {
+    uint8_t data[1024];
+};
+
+DECLARE_HVM_SAVE_TYPE(LAPIC_REGS, 6, struct hvm_hw_lapic_regs);
+
+
+/*
+ * IRQs
+ */
+
+struct hvm_hw_pci_irqs {
+    /*
+     * Virtual interrupt wires for a single PCI bus.
+     * Indexed by: device*4 + INTx#.
+     */
+    union {
+        DECLARE_BITMAP(i, 32*4);
+        uint64_t pad[2];
+    };
+};
+
+DECLARE_HVM_SAVE_TYPE(PCI_IRQ, 7, struct hvm_hw_pci_irqs);
+
+struct hvm_hw_isa_irqs {
+    /*
+     * Virtual interrupt wires for ISA devices.
+     * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
+     */
+    union {
+        DECLARE_BITMAP(i, 16);
+        uint64_t pad[1];
+    };
+};
+
+DECLARE_HVM_SAVE_TYPE(ISA_IRQ, 8, struct hvm_hw_isa_irqs);
+
+struct hvm_hw_pci_link {
+    /*
+     * PCI-ISA interrupt router.
+     * Each PCI <device:INTx#> is 'wire-ORed' into one of four links using
+     * the traditional 'barber's pole' mapping ((device + INTx#) & 3).
+     * The router provides a programmable mapping from each link to a GSI.
+     */
+    uint8_t route[4];
+    uint8_t pad0[4];
+};
+
+DECLARE_HVM_SAVE_TYPE(PCI_LINK, 9, struct hvm_hw_pci_link);
+
+/* 
+ *  PIT
+ */
+
+struct hvm_hw_pit {
+    struct hvm_hw_pit_channel {
+        uint32_t count; /* can be 65536 */
+        uint16_t latched_count;
+        uint8_t count_latched;
+        uint8_t status_latched;
+        uint8_t status;
+        uint8_t read_state;
+        uint8_t write_state;
+        uint8_t write_latch;
+        uint8_t rw_mode;
+        uint8_t mode;
+        uint8_t bcd; /* not supported */
+        uint8_t gate; /* timer start */
+    } channels[3];  /* 3 x 16 bytes */
+    uint32_t speaker_data_on;
+    uint32_t pad0;
+};
+
+DECLARE_HVM_SAVE_TYPE(PIT, 10, struct hvm_hw_pit);
+
+
+/* 
+ * RTC
+ */ 
+
+#define RTC_CMOS_SIZE 14
+struct hvm_hw_rtc {
+    /* CMOS bytes */
+    uint8_t cmos_data[RTC_CMOS_SIZE];
+    /* Index register for 2-part operations */
+    uint8_t cmos_index;
+    uint8_t pad0;
+};
+
+DECLARE_HVM_SAVE_TYPE(RTC, 11, struct hvm_hw_rtc);
+
+
+/*
+ * HPET
+ */
+
+#define HPET_TIMER_NUM     3    /* 3 timers supported now */
+struct hvm_hw_hpet {
+    /* Memory-mapped, software visible registers */
+    uint64_t capability;        /* capabilities */
+    uint64_t res0;              /* reserved */
+    uint64_t config;            /* configuration */
+    uint64_t res1;              /* reserved */
+    uint64_t isr;               /* interrupt status reg */
+    uint64_t res2[25];          /* reserved */
+    uint64_t mc64;              /* main counter */
+    uint64_t res3;              /* reserved */
+    struct {                    /* timers */
+        uint64_t config;        /* configuration/cap */
+        uint64_t cmp;           /* comparator */
+        uint64_t fsb;           /* FSB route, not supported now */
+        uint64_t res4;          /* reserved */
+    } timers[HPET_TIMER_NUM];
+    uint64_t res5[4*(24-HPET_TIMER_NUM)];  /* reserved, up to 0x3ff */
+
+    /* Hidden register state */
+    uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+};
+
+DECLARE_HVM_SAVE_TYPE(HPET, 12, struct hvm_hw_hpet);
+
+
+/*
+ * PM timer
+ */
+
+struct hvm_hw_pmtimer {
+    uint32_t tmr_val;   /* PM_TMR_BLK.TMR_VAL: 32bit free-running counter */
+    uint16_t pm1a_sts;  /* PM1a_EVT_BLK.PM1a_STS: status register */
+    uint16_t pm1a_en;   /* PM1a_EVT_BLK.PM1a_EN: enable register */
+};
+
+DECLARE_HVM_SAVE_TYPE(PMTIMER, 13, struct hvm_hw_pmtimer);
+
+/*
+ * MTRR MSRs
+ */
+
+struct hvm_hw_mtrr {
+#define MTRR_VCNT 8
+#define NUM_FIXED_MSR 11
+    uint64_t msr_pat_cr;
+    /* mtrr physbase & physmask msr pair*/
+    uint64_t msr_mtrr_var[MTRR_VCNT*2];
+    uint64_t msr_mtrr_fixed[NUM_FIXED_MSR];
+    uint64_t msr_mtrr_cap;
+    uint64_t msr_mtrr_def_type;
+};
+
+DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr);
+
+/* 
+ * Largest type-code in use
+ */
+#define HVM_SAVE_CODE_MAX 14
+
+#endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */

Property changes on: xen/interface/arch-x86/hvm/save.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: fbsd:nokeywords
   + true
Added: svn:eol-style
   + native

Index: xen/interface/arch-x86/xen-x86_64.h
===================================================================
--- xen/interface/arch-x86/xen-x86_64.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/xen-x86_64.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,212 @@
+/******************************************************************************
+ * xen-x86_64.h
+ * 
+ * Guest OS interface to x86 64-bit Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+#define __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__
+
+/*
+ * Hypercall interface:
+ *  Input:  %rdi, %rsi, %rdx, %r10, %r8 (arguments 1-5)
+ *  Output: %rax
+ * Access is via hypercall page (set up by guest loader or via a Xen MSR):
+ *  call hypercall_page + hypercall-number * 32
+ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi)
+ */
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+/*
+ * Legacy hypercall interface:
+ * As above, except the entry sequence to the hypervisor is:
+ *  mov $hypercall-number*32,%eax ; syscall
+ * Clobbered: %rcx, %r11, argument registers (as above)
+ */
+#define TRAP_INSTR "syscall"
+#endif
+
+/*
+ * 64-bit segment selectors
+ * These flat segments are in the Xen-private section of every GDT. Since these
+ * are also present in the initial GDT, many OSes will be able to avoid
+ * installing their own GDT.
+ */
+
+#define FLAT_RING3_CS32 0xe023  /* GDT index 260 */
+#define FLAT_RING3_CS64 0xe033  /* GDT index 261 */
+#define FLAT_RING3_DS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_DS64 0x0000  /* NULL selector */
+#define FLAT_RING3_SS32 0xe02b  /* GDT index 262 */
+#define FLAT_RING3_SS64 0xe02b  /* GDT index 262 */
+
+#define FLAT_KERNEL_DS64 FLAT_RING3_DS64
+#define FLAT_KERNEL_DS32 FLAT_RING3_DS32
+#define FLAT_KERNEL_DS   FLAT_KERNEL_DS64
+#define FLAT_KERNEL_CS64 FLAT_RING3_CS64
+#define FLAT_KERNEL_CS32 FLAT_RING3_CS32
+#define FLAT_KERNEL_CS   FLAT_KERNEL_CS64
+#define FLAT_KERNEL_SS64 FLAT_RING3_SS64
+#define FLAT_KERNEL_SS32 FLAT_RING3_SS32
+#define FLAT_KERNEL_SS   FLAT_KERNEL_SS64
+
+#define FLAT_USER_DS64 FLAT_RING3_DS64
+#define FLAT_USER_DS32 FLAT_RING3_DS32
+#define FLAT_USER_DS   FLAT_USER_DS64
+#define FLAT_USER_CS64 FLAT_RING3_CS64
+#define FLAT_USER_CS32 FLAT_RING3_CS32
+#define FLAT_USER_CS   FLAT_USER_CS64
+#define FLAT_USER_SS64 FLAT_RING3_SS64
+#define FLAT_USER_SS32 FLAT_RING3_SS32
+#define FLAT_USER_SS   FLAT_USER_SS64
+
+#define __HYPERVISOR_VIRT_START 0xFFFF800000000000
+#define __HYPERVISOR_VIRT_END   0xFFFF880000000000
+#define __MACH2PHYS_VIRT_START  0xFFFF800000000000
+#define __MACH2PHYS_VIRT_END    0xFFFF804000000000
+
+#ifndef HYPERVISOR_VIRT_START
+#define HYPERVISOR_VIRT_START mk_unsigned_long(__HYPERVISOR_VIRT_START)
+#define HYPERVISOR_VIRT_END   mk_unsigned_long(__HYPERVISOR_VIRT_END)
+#endif
+
+#define MACH2PHYS_VIRT_START  mk_unsigned_long(__MACH2PHYS_VIRT_START)
+#define MACH2PHYS_VIRT_END    mk_unsigned_long(__MACH2PHYS_VIRT_END)
+#define MACH2PHYS_NR_ENTRIES  ((MACH2PHYS_VIRT_END-MACH2PHYS_VIRT_START)>>3)
+#ifndef machine_to_phys_mapping
+#define machine_to_phys_mapping ((unsigned long *)HYPERVISOR_VIRT_START)
+#endif
+
+/*
+ * int HYPERVISOR_set_segment_base(unsigned int which, unsigned long base)
+ *  @which == SEGBASE_*  ;  @base == 64-bit base address
+ * Returns 0 on success.
+ */
+#define SEGBASE_FS          0
+#define SEGBASE_GS_USER     1
+#define SEGBASE_GS_KERNEL   2
+#define SEGBASE_GS_USER_SEL 3 /* Set user %gs specified in base[15:0] */
+
+/*
+ * int HYPERVISOR_iret(void)
+ * All arguments are on the kernel stack, in the following format.
+ * Never returns if successful. Current kernel context is lost.
+ * The saved CS is mapped as follows:
+ *   RING0 -> RING3 kernel mode.
+ *   RING1 -> RING3 kernel mode.
+ *   RING2 -> RING3 kernel mode.
+ *   RING3 -> RING3 user mode.
+ * However RING0 indicates that the guest kernel should return to iteself
+ * directly with
+ *      orb   $3,1*8(%rsp)
+ *      iretq
+ * If flags contains VGCF_in_syscall:
+ *   Restore RAX, RIP, RFLAGS, RSP.
+ *   Discard R11, RCX, CS, SS.
+ * Otherwise:
+ *   Restore RAX, R11, RCX, CS:RIP, RFLAGS, SS:RSP.
+ * All other registers are saved on hypercall entry and restored to user.
+ */
+/* Guest exited in SYSCALL context? Return to guest with SYSRET? */
+#define _VGCF_in_syscall 8
+#define VGCF_in_syscall  (1<<_VGCF_in_syscall)
+#define VGCF_IN_SYSCALL  VGCF_in_syscall
+
+#ifndef __ASSEMBLY__
+
+struct iret_context {
+    /* Top of stack (%rsp at point of hypercall). */
+    uint64_t rax, r11, rcx, flags, rip, cs, rflags, rsp, ss;
+    /* Bottom of iret stack frame. */
+};
+
+#if defined(__GNUC__) && !defined(__STRICT_ANSI__)
+/* Anonymous union includes both 32- and 64-bit names (e.g., eax/rax). */
+#define __DECL_REG(name) union { \
+    uint64_t r ## name, e ## name; \
+    uint32_t _e ## name; \
+}
+#else
+/* Non-gcc sources must always use the proper 64-bit name (e.g., rax). */
+#define __DECL_REG(name) uint64_t r ## name
+#endif
+
+struct cpu_user_regs {
+    uint64_t r15;
+    uint64_t r14;
+    uint64_t r13;
+    uint64_t r12;
+    __DECL_REG(bp);
+    __DECL_REG(bx);
+    uint64_t r11;
+    uint64_t r10;
+    uint64_t r9;
+    uint64_t r8;
+    __DECL_REG(ax);
+    __DECL_REG(cx);
+    __DECL_REG(dx);
+    __DECL_REG(si);
+    __DECL_REG(di);
+    uint32_t error_code;    /* private */
+    uint32_t entry_vector;  /* private */
+    __DECL_REG(ip);
+    uint16_t cs, _pad0[1];
+    uint8_t  saved_upcall_mask;
+    uint8_t  _pad1[3];
+    __DECL_REG(flags);      /* rflags.IF == !saved_upcall_mask */
+    __DECL_REG(sp);
+    uint16_t ss, _pad2[3];
+    uint16_t es, _pad3[3];
+    uint16_t ds, _pad4[3];
+    uint16_t fs, _pad5[3]; /* Non-zero => takes precedence over fs_base.     */
+    uint16_t gs, _pad6[3]; /* Non-zero => takes precedence over gs_base_usr. */
+};
+typedef struct cpu_user_regs cpu_user_regs_t;
+DEFINE_XEN_GUEST_HANDLE(cpu_user_regs_t);
+
+#undef __DECL_REG
+
+#define xen_pfn_to_cr3(pfn) ((unsigned long)(pfn) << 12)
+#define xen_cr3_to_pfn(cr3) ((unsigned long)(cr3) >> 12)
+
+struct arch_vcpu_info {
+    unsigned long cr2;
+    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+typedef unsigned long xen_callback_t;
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_XEN_X86_64_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/arch-x86/xen-x86_64.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86/xen-mca.h
===================================================================
--- xen/interface/arch-x86/xen-mca.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86/xen-mca.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,279 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ * 
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+#define XEN_MCA_INTERFACE_VERSION 0x03000001
+
+/* IN: Dom0 calls hypercall from MC event handler. */
+#define XEN_MC_CORRECTABLE  0x0
+/* IN: Dom0/DomU calls hypercall from MC trap handler. */
+#define XEN_MC_TRAP         0x1
+/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+
+struct mcinfo_common {
+    uint16_t type;      /* structure type */
+    uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+
+/* contains global x86 mc information */
+struct mcinfo_global {
+    struct mcinfo_common common;
+
+    /* running domain at the time in error (most likely the impacted one) */
+    uint16_t mc_domid;
+    uint32_t mc_socketid; /* physical socket of the physical core */
+    uint16_t mc_coreid; /* physical impacted core */
+    uint16_t mc_core_threadid; /* core thread of physical core */
+    uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+    uint64_t mc_gstatus; /* global status */
+    uint32_t mc_flags;
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+    struct mcinfo_common common;
+
+    uint16_t mc_bank; /* bank nr */
+    uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+                        * and if mc_addr is valid. Never valid on DomU. */
+    uint64_t mc_status; /* bank status */
+    uint64_t mc_addr;   /* bank address, only valid
+                         * if addr bit is set in mc_status */
+    uint64_t mc_misc;
+};
+
+
+struct mcinfo_msr {
+    uint64_t reg;   /* MSR */
+    uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */ 
+struct mcinfo_extended {
+    struct mcinfo_common common;
+
+    /* You can fill up to five registers.
+     * If you need more, then use this structure
+     * multiple times. */
+
+    uint32_t mc_msrs; /* Number of msr with valid values. */
+    struct mcinfo_msr mc_msr[5];
+};
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+    /* Number of mcinfo_* entries in mi_data */
+    uint32_t mi_nentries;
+
+    uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)];
+};
+typedef struct mc_info mc_info_t;
+
+
+
+/* 
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    (_mi)->mi_nentries
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    (struct mcinfo_common *)((_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+#define x86_mcinfo_lookup(_ret, _mi, _type)    \
+    do {                                                        \
+        uint32_t found, i;                                      \
+        struct mcinfo_common *_mic;                             \
+                                                                \
+        found = 0;                                              \
+	(_ret) = NULL;						\
+	if (_mi == NULL) break;					\
+        _mic = x86_mcinfo_first(_mi);                           \
+        for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \
+            if (_mic->type == (_type)) {                        \
+                found = 1;                                      \
+                break;                                          \
+            }                                                   \
+            _mic = x86_mcinfo_next(_mic);                       \
+        }                                                       \
+        (_ret) = found ? _mic : NULL;                           \
+    } while (0)
+
+
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables. */
+    uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */
+
+    /* OUT variables. */
+    uint32_t fetch_idx;  /* only useful for Dom0 for the notify hypercall */
+    struct mc_info mc_info;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+    /* IN variables. */
+    uint16_t mc_domid;    /* The unprivileged domain to notify. */
+    uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.
+                           * Usually echo'd value from the fetch hypercall. */
+    uint32_t fetch_idx;   /* echo'd value from the fetch hypercall. */
+
+    /* IN/OUT variables. */
+    uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+
+
+struct xen_mc {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+    union {
+        struct xen_mc_fetch        mc_fetch;
+        struct xen_mc_notifydomain mc_notifydomain;
+        uint8_t pad[MCINFO_HYPERCALLSIZE];
+    } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */

Property changes on: xen/interface/arch-x86/xen-mca.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: fbsd:nokeywords
   + true
Added: svn:eol-style
   + native


Property changes on: xen/interface/arch-x86
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/vcpu.h
===================================================================
--- xen/interface/vcpu.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/vcpu.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * vcpu.h
+ * 
+ * VCPU initialisation, query, and hotplug.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VCPU_H__
+#define __XEN_PUBLIC_VCPU_H__
+
+/*
+ * Prototype for this hypercall is:
+ *  int vcpu_op(int cmd, int vcpuid, void *extra_args)
+ * @cmd        == VCPUOP_??? (VCPU operation).
+ * @vcpuid     == VCPU to operate on.
+ * @extra_args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Initialise a VCPU. Each VCPU can be initialised only once. A 
+ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
+ * 
+ * @extra_arg == pointer to vcpu_guest_context structure containing initial
+ *               state for the VCPU.
+ */
+#define VCPUOP_initialise            0
+
+/*
+ * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
+ * if the VCPU has not been initialised (VCPUOP_initialise).
+ */
+#define VCPUOP_up                    1
+
+/*
+ * Bring down a VCPU (i.e., make it non-runnable).
+ * There are a few caveats that callers should observe:
+ *  1. This operation may return, and VCPU_is_up may return false, before the
+ *     VCPU stops running (i.e., the command is asynchronous). It is a good
+ *     idea to ensure that the VCPU has entered a non-critical loop before
+ *     bringing it down. Alternatively, this operation is guaranteed
+ *     synchronous if invoked by the VCPU itself.
+ *  2. After a VCPU is initialised, there is currently no way to drop all its
+ *     references to domain memory. Even a VCPU that is down still holds
+ *     memory references via its pagetable base pointer and GDT. It is good
+ *     practise to move a VCPU onto an 'idle' or default page table, LDT and
+ *     GDT before bringing it down.
+ */
+#define VCPUOP_down                  2
+
+/* Returns 1 if the given VCPU is up. */
+#define VCPUOP_is_up                 3
+
+/*
+ * Return information about the state and running time of a VCPU.
+ * @extra_arg == pointer to vcpu_runstate_info structure.
+ */
+#define VCPUOP_get_runstate_info     4
+struct vcpu_runstate_info {
+    /* VCPU's current state (RUNSTATE_*). */
+    int      state;
+    /* When was current state entered (system time, ns)? */
+    uint64_t state_entry_time;
+    /*
+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
+     * guaranteed not to drift from system time.
+     */
+    uint64_t time[4];
+};
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);
+
+/* VCPU is currently running on a physical CPU. */
+#define RUNSTATE_running  0
+
+/* VCPU is runnable, but not currently scheduled on any physical CPU. */
+#define RUNSTATE_runnable 1
+
+/* VCPU is blocked (a.k.a. idle). It is therefore not runnable. */
+#define RUNSTATE_blocked  2
+
+/*
+ * VCPU is not runnable, but it is not blocked.
+ * This is a 'catch all' state for things like hotplug and pauses by the
+ * system administrator (or for critical sections in the hypervisor).
+ * RUNSTATE_blocked dominates this state (it is the preferred state).
+ */
+#define RUNSTATE_offline  3
+
+/*
+ * Register a shared memory area from which the guest may obtain its own
+ * runstate information without needing to execute a hypercall.
+ * Notes:
+ *  1. The registered address may be virtual or physical or guest handle,
+ *     depending on the platform. Virtual address or guest handle should be
+ *     registered on x86 systems.
+ *  2. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
+ * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
+ */
+#define VCPUOP_register_runstate_memory_area 5
+struct vcpu_register_runstate_memory_area {
+    union {
+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
+        struct vcpu_runstate_info *v;
+        uint64_t p;
+    } addr;
+};
+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);
+
+/*
+ * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
+ * which can be set via these commands. Periods smaller than one millisecond
+ * may not be supported.
+ */
+#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */
+struct vcpu_set_periodic_timer {
+    uint64_t period_ns;
+};
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);
+
+/*
+ * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
+ * timer which can be set via these commands.
+ */
+#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
+struct vcpu_set_singleshot_timer {
+    uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */
+    uint32_t flags;            /* VCPU_SSHOTTMR_??? */
+};
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);
+
+/* Flags to VCPUOP_set_singleshot_timer. */
+ /* Require the timeout to be in the future (return -ETIME if it's passed). */
+#define _VCPU_SSHOTTMR_future (0)
+#define VCPU_SSHOTTMR_future  (1U << _VCPU_SSHOTTMR_future)
+
+/* 
+ * Register a memory location in the guest address space for the
+ * vcpu_info structure.  This allows the guest to place the vcpu_info
+ * structure in a convenient place, such as in a per-cpu data area.
+ * The pointer need not be page aligned, but the structure must not
+ * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
+ */
+#define VCPUOP_register_vcpu_info   10  /* arg == vcpu_register_vcpu_info_t */
+struct vcpu_register_vcpu_info {
+    uint64_t mfn;    /* mfn of page to place vcpu_info */
+    uint32_t offset; /* offset within page */
+    uint32_t rsvd;   /* unused */
+};
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi             11
+
+/* 
+ * Get the physical ID information for a pinned vcpu's underlying physical
+ * processor.  The physical ID informmation is architecture-specific.
+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and
+ *         greater are reserved.
+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
+ */
+#define VCPUOP_get_physid           12 /* arg == vcpu_get_physid_t */
+struct vcpu_get_physid {
+    uint64_t phys_id;
+};
+typedef struct vcpu_get_physid vcpu_get_physid_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
+#define xen_vcpu_physid_to_x86_apicid(physid) \
+    ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid)))
+#define xen_vcpu_physid_to_x86_acpiid(physid) \
+    ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32)))
+
+#endif /* __XEN_PUBLIC_VCPU_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/vcpu.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/features.h
===================================================================
--- xen/interface/features.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/features.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,74 @@
+/******************************************************************************
+ * features.h
+ * 
+ * Feature flags, reported by XENVER_get_features.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_FEATURES_H__
+#define __XEN_PUBLIC_FEATURES_H__
+
+/*
+ * If set, the guest does not need to write-protect its pagetables, and can
+ * update them via direct writes.
+ */
+#define XENFEAT_writable_page_tables       0
+
+/*
+ * If set, the guest does not need to write-protect its segment descriptor
+ * tables, and can update them via direct writes.
+ */
+#define XENFEAT_writable_descriptor_tables 1
+
+/*
+ * If set, translation between the guest's 'pseudo-physical' address space
+ * and the host's machine address space are handled by the hypervisor. In this
+ * mode the guest does not need to perform phys-to/from-machine translations
+ * when performing page table operations.
+ */
+#define XENFEAT_auto_translated_physmap    2
+
+/* If set, the guest is running in supervisor mode (e.g., x86 ring 0). */
+#define XENFEAT_supervisor_mode_kernel     3
+
+/*
+ * If set, the guest does not need to allocate x86 PAE page directories
+ * below 4GB. This flag is usually implied by auto_translated_physmap.
+ */
+#define XENFEAT_pae_pgdir_above_4gb        4
+
+/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
+#define XENFEAT_mmu_pt_update_preserve_ad  5
+
+#define XENFEAT_NR_SUBMAPS 1
+
+#endif /* __XEN_PUBLIC_FEATURES_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/features.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/physdev.h
===================================================================
--- xen/interface/physdev.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/physdev.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,219 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_PHYSDEV_H__
+#define __XEN_PUBLIC_PHYSDEV_H__
+
+/*
+ * Prototype for this hypercall is:
+ *  int physdev_op(int cmd, void *args)
+ * @cmd  == PHYSDEVOP_??? (physdev operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Notify end-of-interrupt (EOI) for the specified IRQ.
+ * @arg == pointer to physdev_eoi structure.
+ */
+#define PHYSDEVOP_eoi                   12
+struct physdev_eoi {
+    /* IN */
+    uint32_t irq;
+};
+typedef struct physdev_eoi physdev_eoi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+
+/*
+ * Query the status of an IRQ line.
+ * @arg == pointer to physdev_irq_status_query structure.
+ */
+#define PHYSDEVOP_irq_status_query       5
+struct physdev_irq_status_query {
+    /* IN */
+    uint32_t irq;
+    /* OUT */
+    uint32_t flags; /* XENIRQSTAT_* */
+};
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);
+
+/* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
+#define _XENIRQSTAT_needs_eoi   (0)
+#define  XENIRQSTAT_needs_eoi   (1U<<_XENIRQSTAT_needs_eoi)
+
+/* IRQ shared by multiple guests? */
+#define _XENIRQSTAT_shared      (1)
+#define  XENIRQSTAT_shared      (1U<<_XENIRQSTAT_shared)
+
+/*
+ * Set the current VCPU's I/O privilege level.
+ * @arg == pointer to physdev_set_iopl structure.
+ */
+#define PHYSDEVOP_set_iopl               6
+struct physdev_set_iopl {
+    /* IN */
+    uint32_t iopl;
+};
+typedef struct physdev_set_iopl physdev_set_iopl_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);
+
+/*
+ * Set the current VCPU's I/O-port permissions bitmap.
+ * @arg == pointer to physdev_set_iobitmap structure.
+ */
+#define PHYSDEVOP_set_iobitmap           7
+struct physdev_set_iobitmap {
+    /* IN */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+    XEN_GUEST_HANDLE(uint8) bitmap;
+#else
+    uint8_t *bitmap;
+#endif
+    uint32_t nr_ports;
+};
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);
+
+/*
+ * Read or write an IO-APIC register.
+ * @arg == pointer to physdev_apic structure.
+ */
+#define PHYSDEVOP_apic_read              8
+#define PHYSDEVOP_apic_write             9
+struct physdev_apic {
+    /* IN */
+    unsigned long apic_physbase;
+    uint32_t reg;
+    /* IN or OUT */
+    uint32_t value;
+};
+typedef struct physdev_apic physdev_apic_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);
+
+/*
+ * Allocate or free a physical upcall vector for the specified IRQ line.
+ * @arg == pointer to physdev_irq structure.
+ */
+#define PHYSDEVOP_alloc_irq_vector      10
+#define PHYSDEVOP_free_irq_vector       11
+struct physdev_irq {
+    /* IN */
+    uint32_t irq;
+    /* IN or OUT */
+    uint32_t vector;
+};
+typedef struct physdev_irq physdev_irq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
+ 
+#define MAP_PIRQ_TYPE_MSI               0x0
+#define MAP_PIRQ_TYPE_GSI               0x1
+#define MAP_PIRQ_TYPE_UNKNOWN           0x2
+
+#define PHYSDEVOP_map_pirq               13
+struct physdev_map_pirq {
+    domid_t domid;
+    /* IN */
+    int type;
+    /* IN */
+    int index;
+    /* IN or OUT */
+    int pirq;
+    /* IN */
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
+};
+typedef struct physdev_map_pirq physdev_map_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
+
+#define PHYSDEVOP_unmap_pirq             14
+struct physdev_unmap_pirq {
+    domid_t domid;
+    /* IN */
+    int pirq;
+};
+
+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
+
+#define PHYSDEVOP_manage_pci_add         15
+#define PHYSDEVOP_manage_pci_remove      16
+struct physdev_manage_pci {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+}; 
+
+typedef struct physdev_manage_pci physdev_manage_pci_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+
+/*
+ * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
+ * hypercall since 0x00030202.
+ */
+struct physdev_op {
+    uint32_t cmd;
+    union {
+        struct physdev_irq_status_query      irq_status_query;
+        struct physdev_set_iopl              set_iopl;
+        struct physdev_set_iobitmap          set_iobitmap;
+        struct physdev_apic                  apic_op;
+        struct physdev_irq                   irq_op;
+    } u;
+};
+typedef struct physdev_op physdev_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
+
+/*
+ * Notify that some PIRQ-bound event channels have been unmasked.
+ * ** This command is obsolete since interface version 0x00030202 and is **
+ * ** unsupported by newer versions of Xen.                              **
+ */
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY      4
+
+/*
+ * These all-capitals physdev operation names are superceded by the new names
+ * (defined above) since interface version 0x00030202.
+ */
+#define PHYSDEVOP_IRQ_STATUS_QUERY       PHYSDEVOP_irq_status_query
+#define PHYSDEVOP_SET_IOPL               PHYSDEVOP_set_iopl
+#define PHYSDEVOP_SET_IOBITMAP           PHYSDEVOP_set_iobitmap
+#define PHYSDEVOP_APIC_READ              PHYSDEVOP_apic_read
+#define PHYSDEVOP_APIC_WRITE             PHYSDEVOP_apic_write
+#define PHYSDEVOP_ASSIGN_VECTOR          PHYSDEVOP_alloc_irq_vector
+#define PHYSDEVOP_FREE_VECTOR            PHYSDEVOP_free_irq_vector
+#define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
+#define PHYSDEVOP_IRQ_SHARED             XENIRQSTAT_shared
+
+#endif /* __XEN_PUBLIC_PHYSDEV_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/physdev.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/grant_table.h
===================================================================
--- xen/interface/grant_table.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/grant_table.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,431 @@
+/******************************************************************************
+ * grant_table.h
+ * 
+ * Interface for granting foreign access to page frames, and receiving
+ * page-ownership transfers.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
+#define __XEN_PUBLIC_GRANT_TABLE_H__
+
+
+/***********************************
+ * GRANT TABLE REPRESENTATION
+ */
+
+/* Some rough guidelines on accessing and updating grant-table entries
+ * in a concurrency-safe manner. For more information, Linux contains a
+ * reference implementation for guest OSes (arch/xen/kernel/grant_table.c).
+ * 
+ * NB. WMB is a no-op on current-generation x86 processors. However, a
+ *     compiler barrier will still be required.
+ * 
+ * Introducing a valid entry into the grant table:
+ *  1. Write ent->domid.
+ *  2. Write ent->frame:
+ *      GTF_permit_access:   Frame to which access is permitted.
+ *      GTF_accept_transfer: Pseudo-phys frame slot being filled by new
+ *                           frame, or zero if none.
+ *  3. Write memory barrier (WMB).
+ *  4. Write ent->flags, inc. valid type.
+ * 
+ * Invalidating an unused GTF_permit_access entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & (GTF_reading|GTF_writing)).
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *
+ * Invalidating an in-use GTF_permit_access entry:
+ *  This cannot be done directly. Request assistance from the domain controller
+ *  which can set a timeout on the use of a grant entry and take necessary
+ *  action. (NB. This is not yet implemented!).
+ * 
+ * Invalidating an unused GTF_accept_transfer entry:
+ *  1. flags = ent->flags.
+ *  2. Observe that !(flags & GTF_transfer_committed). [*]
+ *  3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0).
+ *  NB. No need for WMB as reuse of entry is control-dependent on success of
+ *      step 3, and all architectures guarantee ordering of ctrl-dep writes.
+ *  [*] If GTF_transfer_committed is set then the grant entry is 'committed'.
+ *      The guest must /not/ modify the grant entry until the address of the
+ *      transferred frame is written. It is safe for the guest to spin waiting
+ *      for this to occur (detect by observing GTF_transfer_completed in
+ *      ent->flags).
+ *
+ * Invalidating a committed GTF_accept_transfer entry:
+ *  1. Wait for (ent->flags & GTF_transfer_completed).
+ *
+ * Changing a GTF_permit_access from writable to read-only:
+ *  Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing.
+ * 
+ * Changing a GTF_permit_access from read-only to writable:
+ *  Use SMP-safe bit-setting instruction.
+ */
+
+/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+struct grant_entry {
+    /* GTF_xxx: various type and flag information.  [XEN,GST] */
+    uint16_t flags;
+    /* The domain being granted foreign privileges. [GST] */
+    domid_t  domid;
+    /*
+     * GTF_permit_access: Frame that @domid is allowed to map and access. [GST]
+     * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN]
+     */
+    uint32_t frame;
+};
+typedef struct grant_entry grant_entry_t;
+
+/*
+ * Type of grant entry.
+ *  GTF_invalid: This grant entry grants no privileges.
+ *  GTF_permit_access: Allow @domid to map/access @frame.
+ *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ *                       to this guest. Xen writes the page number to @frame.
+ */
+#define GTF_invalid         (0U<<0)
+#define GTF_permit_access   (1U<<0)
+#define GTF_accept_transfer (2U<<0)
+#define GTF_type_mask       (3U<<0)
+
+/*
+ * Subflags for GTF_permit_access.
+ *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
+ *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
+ */
+#define _GTF_readonly       (2)
+#define GTF_readonly        (1U<<_GTF_readonly)
+#define _GTF_reading        (3)
+#define GTF_reading         (1U<<_GTF_reading)
+#define _GTF_writing        (4)
+#define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_PWT            (5)
+#define GTF_PWT             (1U<<_GTF_PWT)
+#define _GTF_PCD            (6)
+#define GTF_PCD             (1U<<_GTF_PCD)
+#define _GTF_PAT            (7)
+#define GTF_PAT             (1U<<_GTF_PAT)
+
+/*
+ * Subflags for GTF_accept_transfer:
+ *  GTF_transfer_committed: Xen sets this flag to indicate that it is committed
+ *      to transferring ownership of a page frame. When a guest sees this flag
+ *      it must /not/ modify the grant entry until GTF_transfer_completed is
+ *      set by Xen.
+ *  GTF_transfer_completed: It is safe for the guest to spin-wait on this flag
+ *      after reading GTF_transfer_committed. Xen will always write the frame
+ *      address, followed by ORing this flag, in a timely manner.
+ */
+#define _GTF_transfer_committed (2)
+#define GTF_transfer_committed  (1U<<_GTF_transfer_committed)
+#define _GTF_transfer_completed (3)
+#define GTF_transfer_completed  (1U<<_GTF_transfer_completed)
+
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
+ */
+
+/*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
+ * Handle to track a mapping created via a grant reference.
+ */
+typedef uint32_t grant_handle_t;
+
+/*
+ * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access
+ * by devices and/or host CPUs. If successful, <handle> is a tracking number
+ * that must be presented later to destroy the mapping(s). On error, <handle>
+ * is a negative status code.
+ * NOTES:
+ *  1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address
+ *     via which I/O devices may access the granted frame.
+ *  2. If GNTMAP_host_map is specified then a mapping will be added at
+ *     either a host virtual address in the current address space, or at
+ *     a PTE at the specified machine address.  The type of mapping to
+ *     perform is selected through the GNTMAP_contains_pte flag, and the 
+ *     address is specified in <host_addr>.
+ *  3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a
+ *     host mapping is destroyed by other means then it is *NOT* guaranteed
+ *     to be accounted to the correct grant reference!
+ */
+#define GNTTABOP_map_grant_ref        0
+struct gnttab_map_grant_ref {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint32_t flags;               /* GNTMAP_* */
+    grant_ref_t ref;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    grant_handle_t handle;
+    uint64_t dev_bus_addr;
+};
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);
+
+/*
+ * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
+ * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that
+ * field is ignored. If non-zero, they must refer to a device/host mapping
+ * that is tracked by <handle>
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  3. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_grant_ref      1
+struct gnttab_unmap_grant_ref {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t dev_bus_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+};
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);
+
+/*
+ * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
+ * <nr_frames> pages. The frame addresses are written to the <frame_list>.
+ * Only <nr_frames> addresses are written, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ *  3. Xen may not support more than a single grant-table page per domain.
+ */
+#define GNTTABOP_setup_table          2
+struct gnttab_setup_table {
+    /* IN parameters. */
+    domid_t  dom;
+    uint32_t nr_frames;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    XEN_GUEST_HANDLE(ulong) frame_list;
+};
+typedef struct gnttab_setup_table gnttab_setup_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);
+
+/*
+ * GNTTABOP_dump_table: Dump the contents of the grant table to the
+ * xen console. Debugging use only.
+ */
+#define GNTTABOP_dump_table           3
+struct gnttab_dump_table {
+    /* IN parameters. */
+    domid_t dom;
+    /* OUT parameters. */
+    int16_t status;               /* GNTST_* */
+};
+typedef struct gnttab_dump_table gnttab_dump_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);
+
+/*
+ * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
+ * foreign domain has previously registered its interest in the transfer via
+ * <domid, ref>.
+ * 
+ * Note that, even if the transfer fails, the specified page no longer belongs
+ * to the calling domain *unless* the error is GNTST_bad_page.
+ */
+#define GNTTABOP_transfer                4
+struct gnttab_transfer {
+    /* IN parameters. */
+    xen_pfn_t     mfn;
+    domid_t       domid;
+    grant_ref_t   ref;
+    /* OUT parameters. */
+    int16_t       status;
+};
+typedef struct gnttab_transfer gnttab_transfer_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+
+
+/*
+ * GNTTABOP_copy: Hypervisor based copy
+ * source and destinations can be eithers MFNs or, for foreign domains,
+ * grant references. the foreign domain has to grant read/write access
+ * in its grant table.
+ *
+ * The flags specify what type source and destinations are (either MFN
+ * or grant reference).
+ *
+ * Note that this can also be used to copy data between two domains
+ * via a third party if the source and destination domains had previously
+ * grant appropriate access to their pages to the third party.
+ *
+ * source_offset specifies an offset in the source frame, dest_offset
+ * the offset in the target frame and  len specifies the number of
+ * bytes to be copied.
+ */
+
+#define _GNTCOPY_source_gref      (0)
+#define GNTCOPY_source_gref       (1<<_GNTCOPY_source_gref)
+#define _GNTCOPY_dest_gref        (1)
+#define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)
+
+#define GNTTABOP_copy                 5
+typedef struct gnttab_copy {
+    /* IN parameters. */
+    struct {
+        union {
+            grant_ref_t ref;
+            xen_pfn_t   gmfn;
+        } u;
+        domid_t  domid;
+        uint16_t offset;
+    } source, dest;
+    uint16_t      len;
+    uint16_t      flags;          /* GNTCOPY_* */
+    /* OUT parameters. */
+    int16_t       status;
+} gnttab_copy_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);
+
+/*
+ * GNTTABOP_query_size: Query the current and maximum sizes of the shared
+ * grant table.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_query_size           6
+struct gnttab_query_size {
+    /* IN parameters. */
+    domid_t  dom;
+    /* OUT parameters. */
+    uint32_t nr_frames;
+    uint32_t max_nr_frames;
+    int16_t  status;              /* GNTST_* */
+};
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace    7
+struct gnttab_unmap_and_replace {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
+
+/*
+ * Bitfield values for update_pin_status.flags.
+ */
+ /* Map the grant entry for access by I/O devices. */
+#define _GNTMAP_device_map      (0)
+#define GNTMAP_device_map       (1<<_GNTMAP_device_map)
+ /* Map the grant entry for access by host CPUs. */
+#define _GNTMAP_host_map        (1)
+#define GNTMAP_host_map         (1<<_GNTMAP_host_map)
+ /* Accesses to the granted frame will be restricted to read-only access. */
+#define _GNTMAP_readonly        (2)
+#define GNTMAP_readonly         (1<<_GNTMAP_readonly)
+ /*
+  * GNTMAP_host_map subflag:
+  *  0 => The host mapping is usable only by the guest OS.
+  *  1 => The host mapping is usable by guest OS + current application.
+  */
+#define _GNTMAP_application_map (3)
+#define GNTMAP_application_map  (1<<_GNTMAP_application_map)
+
+ /*
+  * GNTMAP_contains_pte subflag:
+  *  0 => This map request contains a host virtual address.
+  *  1 => This map request contains the machine addess of the PTE to update.
+  */
+#define _GNTMAP_contains_pte    (4)
+#define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)
+
+/*
+ * Values for error status returns. All errors are -ve.
+ */
+#define GNTST_okay             (0)  /* Normal return.                        */
+#define GNTST_general_error    (-1) /* General undefined error.              */
+#define GNTST_bad_domain       (-2) /* Unrecognsed domain id.                */
+#define GNTST_bad_gntref       (-3) /* Unrecognised or inappropriate gntref. */
+#define GNTST_bad_handle       (-4) /* Unrecognised or inappropriate handle. */
+#define GNTST_bad_virt_addr    (-5) /* Inappropriate virtual address to map. */
+#define GNTST_bad_dev_addr     (-6) /* Inappropriate device address to unmap.*/
+#define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
+#define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
+#define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+
+#define GNTTABOP_error_msgs {                   \
+    "okay",                                     \
+    "undefined error",                          \
+    "unrecognised domain id",                   \
+    "invalid grant reference",                  \
+    "invalid mapping handle",                   \
+    "invalid virtual address",                  \
+    "invalid device address",                   \
+    "no spare translation slot in the I/O MMU", \
+    "permission denied",                        \
+    "bad page",                                 \
+    "copy arguments cross page boundary",       \
+    "page address size too large"               \
+}
+
+#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/grant_table.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/COPYING
===================================================================
--- xen/interface/COPYING	(.../stable/6/sys)	(revision 0)
+++ xen/interface/COPYING	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,38 @@
+XEN NOTICE
+==========
+
+This copyright applies to all files within this subdirectory and its
+subdirectories:
+  include/public/*.h
+  include/public/hvm/*.h
+  include/public/io/*.h
+
+The intention is that these files can be freely copied into the source
+tree of an operating system when porting that OS to run on Xen. Doing
+so does *not* cause the OS to become subject to the terms of the GPL.
+
+All other files in the Xen source distribution are covered by version
+2 of the GNU General Public License except where explicitly stated
+otherwise within individual source files.
+
+ -- Keir Fraser (on behalf of the Xen team)
+
+=====================================================================
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
+DEALINGS IN THE SOFTWARE.

Property changes on: xen/interface/COPYING
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/platform.h
===================================================================
--- xen/interface/platform.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/platform.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,346 @@
+/******************************************************************************
+ * platform.h
+ * 
+ * Hardware platform operations. Intended for use by domain-0 kernel.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_PLATFORM_H__
+#define __XEN_PUBLIC_PLATFORM_H__
+
+#include "xen.h"
+
+#define XENPF_INTERFACE_VERSION 0x03000001
+
+/*
+ * Set clock such that it would read <secs,nsecs> after 00:00:00 UTC,
+ * 1 January, 1970 if the current system time was <system_time>.
+ */
+#define XENPF_settime             17
+struct xenpf_settime {
+    /* IN variables. */
+    uint32_t secs;
+    uint32_t nsecs;
+    uint64_t system_time;
+};
+typedef struct xenpf_settime xenpf_settime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_settime_t);
+
+/*
+ * Request memory range (@mfn, @mfn+@nr_mfns-1) to have type @type.
+ * On x86, @type is an architecture-defined MTRR memory type.
+ * On success, returns the MTRR that was used (@reg) and a handle that can
+ * be passed to XENPF_DEL_MEMTYPE to accurately tear down the new setting.
+ * (x86-specific).
+ */
+#define XENPF_add_memtype         31
+struct xenpf_add_memtype {
+    /* IN variables. */
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+    /* OUT variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_add_memtype xenpf_add_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_add_memtype_t);
+
+/*
+ * Tear down an existing memory-range type. If @handle is remembered then it
+ * should be passed in to accurately tear down the correct setting (in case
+ * of overlapping memory regions with differing types). If it is not known
+ * then @handle should be set to zero. In all cases @reg must be set.
+ * (x86-specific).
+ */
+#define XENPF_del_memtype         32
+struct xenpf_del_memtype {
+    /* IN variables. */
+    uint32_t handle;
+    uint32_t reg;
+};
+typedef struct xenpf_del_memtype xenpf_del_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_del_memtype_t);
+
+/* Read current type of an MTRR (x86-specific). */
+#define XENPF_read_memtype        33
+struct xenpf_read_memtype {
+    /* IN variables. */
+    uint32_t reg;
+    /* OUT variables. */
+    xen_pfn_t mfn;
+    uint64_t nr_mfns;
+    uint32_t type;
+};
+typedef struct xenpf_read_memtype xenpf_read_memtype_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_read_memtype_t);
+
+#define XENPF_microcode_update    35
+struct xenpf_microcode_update {
+    /* IN variables. */
+    XEN_GUEST_HANDLE(const_void) data;/* Pointer to microcode data */
+    uint32_t length;                  /* Length of microcode data. */
+};
+typedef struct xenpf_microcode_update xenpf_microcode_update_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_microcode_update_t);
+
+#define XENPF_platform_quirk      39
+#define QUIRK_NOIRQBALANCING      1 /* Do not restrict IO-APIC RTE targets */
+#define QUIRK_IOAPIC_BAD_REGSEL   2 /* IO-APIC REGSEL forgets its value    */
+#define QUIRK_IOAPIC_GOOD_REGSEL  3 /* IO-APIC REGSEL behaves properly     */
+struct xenpf_platform_quirk {
+    /* IN variables. */
+    uint32_t quirk_id;
+};
+typedef struct xenpf_platform_quirk xenpf_platform_quirk_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t);
+
+#define XENPF_firmware_info       50
+#define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
+#define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
+#define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+struct xenpf_firmware_info {
+    /* IN variables. */
+    uint32_t type;
+    uint32_t index;
+    /* OUT variables. */
+    union {
+        struct {
+            /* Int13, Fn48: Check Extensions Present. */
+            uint8_t device;                   /* %dl: bios device number */
+            uint8_t version;                  /* %ah: major version      */
+            uint16_t interface_support;       /* %cx: support bitmap     */
+            /* Int13, Fn08: Legacy Get Device Parameters. */
+            uint16_t legacy_max_cylinder;     /* %cl[7:6]:%ch: max cyl # */
+            uint8_t legacy_max_head;          /* %dh: max head #         */
+            uint8_t legacy_sectors_per_track; /* %cl[5:0]: max sector #  */
+            /* Int13, Fn41: Get Device Parameters (as filled into %ds:%esi). */
+            /* NB. First uint16_t of buffer must be set to buffer size.      */
+            XEN_GUEST_HANDLE(void) edd_params;
+        } disk_info; /* XEN_FW_DISK_INFO */
+        struct {
+            uint8_t device;                   /* bios device number  */
+            uint32_t mbr_signature;           /* offset 0x1b8 in mbr */
+        } disk_mbr_signature; /* XEN_FW_DISK_MBR_SIGNATURE */
+        struct {
+            /* Int10, AX=4F15: Get EDID info. */
+            uint8_t capabilities;
+            uint8_t edid_transfer_time;
+            /* must refer to 128-byte buffer */
+            XEN_GUEST_HANDLE(uint8) edid;
+        } vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+    } u;
+};
+typedef struct xenpf_firmware_info xenpf_firmware_info_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_firmware_info_t);
+
+#define XENPF_enter_acpi_sleep    51
+struct xenpf_enter_acpi_sleep {
+    /* IN variables */
+    uint16_t pm1a_cnt_val;      /* PM1a control value. */
+    uint16_t pm1b_cnt_val;      /* PM1b control value. */
+    uint32_t sleep_state;       /* Which state to enter (Sn). */
+    uint32_t flags;             /* Must be zero. */
+};
+typedef struct xenpf_enter_acpi_sleep xenpf_enter_acpi_sleep_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_enter_acpi_sleep_t);
+
+#define XENPF_change_freq         52
+struct xenpf_change_freq {
+    /* IN variables */
+    uint32_t flags; /* Must be zero. */
+    uint32_t cpu;   /* Physical cpu. */
+    uint64_t freq;  /* New frequency (Hz). */
+};
+typedef struct xenpf_change_freq xenpf_change_freq_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_change_freq_t);
+
+/*
+ * Get idle times (nanoseconds since boot) for physical CPUs specified in the
+ * @cpumap_bitmap with range [0..@cpumap_nr_cpus-1]. The @idletime array is
+ * indexed by CPU number; only entries with the corresponding @cpumap_bitmap
+ * bit set are written to. On return, @cpumap_bitmap is modified so that any
+ * non-existent CPUs are cleared. Such CPUs have their @idletime array entry
+ * cleared.
+ */
+#define XENPF_getidletime         53
+struct xenpf_getidletime {
+    /* IN/OUT variables */
+    /* IN: CPUs to interrogate; OUT: subset of IN which are present */
+    XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
+    /* IN variables */
+    /* Size of cpumap bitmap. */
+    uint32_t cpumap_nr_cpus;
+    /* Must be indexable for every cpu in cpumap_bitmap. */
+    XEN_GUEST_HANDLE(uint64) idletime;
+    /* OUT variables */
+    /* System time when the idletime snapshots were taken. */
+    uint64_t now;
+};
+typedef struct xenpf_getidletime xenpf_getidletime_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
+
+#define XENPF_set_processor_pminfo      54
+
+/* ability bits */
+#define XEN_PROCESSOR_PM_CX	1
+#define XEN_PROCESSOR_PM_PX	2
+#define XEN_PROCESSOR_PM_TX	4
+
+/* cmd type */
+#define XEN_PM_CX   0
+#define XEN_PM_PX   1
+#define XEN_PM_TX   2
+
+/* Px sub info type */
+#define XEN_PX_PCT   1
+#define XEN_PX_PSS   2
+#define XEN_PX_PPC   4
+#define XEN_PX_PSD   8
+
+struct xen_power_register {
+    uint32_t     space_id;
+    uint32_t     bit_width;
+    uint32_t     bit_offset;
+    uint32_t     access_size;
+    uint64_t     address;
+};
+
+struct xen_processor_csd {
+    uint32_t    domain;      /* domain number of one dependent group */
+    uint32_t    coord_type;  /* coordination type */
+    uint32_t    num;         /* number of processors in same domain */
+};
+typedef struct xen_processor_csd xen_processor_csd_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_csd_t);
+
+struct xen_processor_cx {
+    struct xen_power_register  reg; /* GAS for Cx trigger register */
+    uint8_t     type;     /* cstate value, c0: 0, c1: 1, ... */
+    uint32_t    latency;  /* worst latency (ms) to enter/exit this cstate */
+    uint32_t    power;    /* average power consumption(mW) */
+    uint32_t    dpcnt;    /* number of dependency entries */
+    XEN_GUEST_HANDLE(xen_processor_csd_t) dp; /* NULL if no dependency */
+};
+typedef struct xen_processor_cx xen_processor_cx_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_cx_t);
+
+struct xen_processor_flags {
+    uint32_t bm_control:1;
+    uint32_t bm_check:1;
+    uint32_t has_cst:1;
+    uint32_t power_setup_done:1;
+    uint32_t bm_rld_set:1;
+};
+
+struct xen_processor_power {
+    uint32_t count;  /* number of C state entries in array below */
+    struct xen_processor_flags flags;  /* global flags of this processor */
+    XEN_GUEST_HANDLE(xen_processor_cx_t) states; /* supported c states */
+};
+
+struct xen_pct_register {
+    uint8_t  descriptor;
+    uint16_t length;
+    uint8_t  space_id;
+    uint8_t  bit_width;
+    uint8_t  bit_offset;
+    uint8_t  reserved;
+    uint64_t address;
+};
+
+struct xen_processor_px {
+    uint64_t core_frequency; /* megahertz */
+    uint64_t power;      /* milliWatts */
+    uint64_t transition_latency; /* microseconds */
+    uint64_t bus_master_latency; /* microseconds */
+    uint64_t control;        /* control value */
+    uint64_t status;     /* success indicator */
+};
+typedef struct xen_processor_px xen_processor_px_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
+
+struct xen_psd_package {
+    uint64_t num_entries;
+    uint64_t revision;
+    uint64_t domain;
+    uint64_t coord_type;
+    uint64_t num_processors;
+};
+
+struct xen_processor_performance {
+    uint32_t flags;     /* flag for Px sub info type */
+    uint32_t platform_limit;  /* Platform limitation on freq usage */
+    struct xen_pct_register control_register;
+    struct xen_pct_register status_register;
+    uint32_t state_count;     /* total available performance states */
+    XEN_GUEST_HANDLE(xen_processor_px_t) states;
+    struct xen_psd_package domain_info;
+    uint32_t shared_type;     /* coordination type of this processor */
+};
+typedef struct xen_processor_performance xen_processor_performance_t;
+DEFINE_XEN_GUEST_HANDLE(xen_processor_performance_t);
+
+struct xenpf_set_processor_pminfo {
+    /* IN variables */
+    uint32_t id;    /* ACPI CPU ID */
+    uint32_t type;  /* {XEN_PM_CX, XEN_PM_PX} */
+    union {
+        struct xen_processor_power          power;/* Cx: _CST/_CSD */
+        struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+    } u;
+};
+typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t;
+DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t);
+
+struct xen_platform_op {
+    uint32_t cmd;
+    uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
+    union {
+        struct xenpf_settime           settime;
+        struct xenpf_add_memtype       add_memtype;
+        struct xenpf_del_memtype       del_memtype;
+        struct xenpf_read_memtype      read_memtype;
+        struct xenpf_microcode_update  microcode;
+        struct xenpf_platform_quirk    platform_quirk;
+        struct xenpf_firmware_info     firmware_info;
+        struct xenpf_enter_acpi_sleep  enter_acpi_sleep;
+        struct xenpf_change_freq       change_freq;
+        struct xenpf_getidletime       getidletime;
+        struct xenpf_set_processor_pminfo set_pminfo;
+        uint8_t                        pad[128];
+    } u;
+};
+typedef struct xen_platform_op xen_platform_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_platform_op_t);
+
+#endif /* __XEN_PUBLIC_PLATFORM_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/platform.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/sched.h
===================================================================
--- xen/interface/sched.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/sched.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,121 @@
+/******************************************************************************
+ * sched.h
+ * 
+ * Scheduler state interactions
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_SCHED_H__
+#define __XEN_PUBLIC_SCHED_H__
+
+#include "event_channel.h"
+
+/*
+ * The prototype for this hypercall is:
+ *  long sched_op(int cmd, void *arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == Operation-specific extra argument(s), as described below.
+ * 
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
+ * of this hypercall, supporting only the commands yield, block and shutdown:
+ *  long sched_op(int cmd, unsigned long arg)
+ * @cmd == SCHEDOP_??? (scheduler operation).
+ * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
+ *      == SHUTDOWN_* code (SCHEDOP_shutdown)
+ * This legacy version is available to new guests as sched_op_compat().
+ */
+
+/*
+ * Voluntarily yield the CPU.
+ * @arg == NULL.
+ */
+#define SCHEDOP_yield       0
+
+/*
+ * Block execution of this VCPU until an event is received for processing.
+ * If called with event upcalls masked, this operation will atomically
+ * reenable event delivery and check for pending events before blocking the
+ * VCPU. This avoids a "wakeup waiting" race.
+ * @arg == NULL.
+ */
+#define SCHEDOP_block       1
+
+/*
+ * Halt execution of this domain (all VCPUs) and notify the system controller.
+ * @arg == pointer to sched_shutdown structure.
+ */
+#define SCHEDOP_shutdown    2
+struct sched_shutdown {
+    unsigned int reason; /* SHUTDOWN_* */
+};
+typedef struct sched_shutdown sched_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);
+
+/*
+ * Poll a set of event-channel ports. Return when one or more are pending. An
+ * optional timeout may be specified.
+ * @arg == pointer to sched_poll structure.
+ */
+#define SCHEDOP_poll        3
+struct sched_poll {
+    XEN_GUEST_HANDLE(evtchn_port_t) ports;
+    unsigned int nr_ports;
+    uint64_t timeout;
+};
+typedef struct sched_poll sched_poll_t;
+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown structure.
+ */
+#define SCHEDOP_remote_shutdown        4
+struct sched_remote_shutdown {
+    domid_t domain_id;         /* Remote domain ID */
+    unsigned int reason;       /* SHUTDOWN_xxx reason */
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);
+
+/*
+ * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
+ * software to determine the appropriate action. For the most part, Xen does
+ * not care about the shutdown code.
+ */
+#define SHUTDOWN_poweroff   0  /* Domain exited normally. Clean up and kill. */
+#define SHUTDOWN_reboot     1  /* Clean up, kill, and then restart.          */
+#define SHUTDOWN_suspend    2  /* Clean up, save suspend info, kill.         */
+#define SHUTDOWN_crash      3  /* Tell controller we've crashed.             */
+
+#endif /* __XEN_PUBLIC_SCHED_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/sched.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/elfstructs.h
===================================================================
--- xen/interface/elfstructs.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/elfstructs.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,527 @@
+#ifndef __XEN_PUBLIC_ELFSTRUCTS_H__
+#define __XEN_PUBLIC_ELFSTRUCTS_H__ 1
+/*
+ * Copyright (c) 1995, 1996 Erik Theisen.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+typedef uint8_t		Elf_Byte;
+
+typedef uint32_t	Elf32_Addr;	/* Unsigned program address */
+typedef uint32_t	Elf32_Off;	/* Unsigned file offset */
+typedef int32_t		Elf32_Sword;	/* Signed large integer */
+typedef uint32_t	Elf32_Word;	/* Unsigned large integer */
+typedef uint16_t	Elf32_Half;	/* Unsigned medium integer */
+
+typedef uint64_t	Elf64_Addr;
+typedef uint64_t	Elf64_Off;
+typedef int32_t		Elf64_Shalf;
+
+typedef int32_t		Elf64_Sword;
+typedef uint32_t	Elf64_Word;
+
+typedef int64_t		Elf64_Sxword;
+typedef uint64_t	Elf64_Xword;
+
+typedef uint32_t	Elf64_Half;
+typedef uint16_t	Elf64_Quarter;
+
+/*
+ * e_ident[] identification indexes
+ * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html
+ */
+#define EI_MAG0		0		/* file ID */
+#define EI_MAG1		1		/* file ID */
+#define EI_MAG2		2		/* file ID */
+#define EI_MAG3		3		/* file ID */
+#define EI_CLASS	4		/* file class */
+#define EI_DATA		5		/* data encoding */
+#define EI_VERSION	6		/* ELF header version */
+#define EI_OSABI	7		/* OS/ABI ID */
+#define EI_ABIVERSION	8		/* ABI version */
+#define EI_PAD		9		/* start of pad bytes */
+#define EI_NIDENT	16		/* Size of e_ident[] */
+
+/* e_ident[] magic number */
+#define	ELFMAG0		0x7f		/* e_ident[EI_MAG0] */
+#define	ELFMAG1		'E'		/* e_ident[EI_MAG1] */
+#define	ELFMAG2		'L'		/* e_ident[EI_MAG2] */
+#define	ELFMAG3		'F'		/* e_ident[EI_MAG3] */
+#define	ELFMAG		"\177ELF"	/* magic */
+#define	SELFMAG		4		/* size of magic */
+
+/* e_ident[] file class */
+#define	ELFCLASSNONE	0		/* invalid */
+#define	ELFCLASS32	1		/* 32-bit objs */
+#define	ELFCLASS64	2		/* 64-bit objs */
+#define	ELFCLASSNUM	3		/* number of classes */
+
+/* e_ident[] data encoding */
+#define ELFDATANONE	0		/* invalid */
+#define ELFDATA2LSB	1		/* Little-Endian */
+#define ELFDATA2MSB	2		/* Big-Endian */
+#define ELFDATANUM	3		/* number of data encode defines */
+
+/* e_ident[] Operating System/ABI */
+#define ELFOSABI_SYSV		0	/* UNIX System V ABI */
+#define ELFOSABI_HPUX		1	/* HP-UX operating system */
+#define ELFOSABI_NETBSD		2	/* NetBSD */
+#define ELFOSABI_LINUX		3	/* GNU/Linux */
+#define ELFOSABI_HURD		4	/* GNU/Hurd */
+#define ELFOSABI_86OPEN		5	/* 86Open common IA32 ABI */
+#define ELFOSABI_SOLARIS	6	/* Solaris */
+#define ELFOSABI_MONTEREY	7	/* Monterey */
+#define ELFOSABI_IRIX		8	/* IRIX */
+#define ELFOSABI_FREEBSD	9	/* FreeBSD */
+#define ELFOSABI_TRU64		10	/* TRU64 UNIX */
+#define ELFOSABI_MODESTO	11	/* Novell Modesto */
+#define ELFOSABI_OPENBSD	12	/* OpenBSD */
+#define ELFOSABI_ARM		97	/* ARM */
+#define ELFOSABI_STANDALONE	255	/* Standalone (embedded) application */
+
+/* e_ident */
+#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
+                      (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
+                      (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
+                      (ehdr).e_ident[EI_MAG3] == ELFMAG3)
+
+/* ELF Header */
+typedef struct elfhdr {
+	unsigned char	e_ident[EI_NIDENT]; /* ELF Identification */
+	Elf32_Half	e_type;		/* object file type */
+	Elf32_Half	e_machine;	/* machine */
+	Elf32_Word	e_version;	/* object file version */
+	Elf32_Addr	e_entry;	/* virtual entry point */
+	Elf32_Off	e_phoff;	/* program header table offset */
+	Elf32_Off	e_shoff;	/* section header table offset */
+	Elf32_Word	e_flags;	/* processor-specific flags */
+	Elf32_Half	e_ehsize;	/* ELF header size */
+	Elf32_Half	e_phentsize;	/* program header entry size */
+	Elf32_Half	e_phnum;	/* number of program header entries */
+	Elf32_Half	e_shentsize;	/* section header entry size */
+	Elf32_Half	e_shnum;	/* number of section header entries */
+	Elf32_Half	e_shstrndx;	/* section header table's "section
+					   header string table" entry offset */
+} Elf32_Ehdr;
+
+typedef struct {
+	unsigned char	e_ident[EI_NIDENT];	/* Id bytes */
+	Elf64_Quarter	e_type;			/* file type */
+	Elf64_Quarter	e_machine;		/* machine type */
+	Elf64_Half	e_version;		/* version number */
+	Elf64_Addr	e_entry;		/* entry point */
+	Elf64_Off	e_phoff;		/* Program hdr offset */
+	Elf64_Off	e_shoff;		/* Section hdr offset */
+	Elf64_Half	e_flags;		/* Processor flags */
+	Elf64_Quarter	e_ehsize;		/* sizeof ehdr */
+	Elf64_Quarter	e_phentsize;		/* Program header entry size */
+	Elf64_Quarter	e_phnum;		/* Number of program headers */
+	Elf64_Quarter	e_shentsize;		/* Section header entry size */
+	Elf64_Quarter	e_shnum;		/* Number of section headers */
+	Elf64_Quarter	e_shstrndx;		/* String table index */
+} Elf64_Ehdr;
+
+/* e_type */
+#define ET_NONE		0		/* No file type */
+#define ET_REL		1		/* relocatable file */
+#define ET_EXEC		2		/* executable file */
+#define ET_DYN		3		/* shared object file */
+#define ET_CORE		4		/* core file */
+#define ET_NUM		5		/* number of types */
+#define ET_LOPROC	0xff00		/* reserved range for processor */
+#define ET_HIPROC	0xffff		/*  specific e_type */
+
+/* e_machine */
+#define EM_NONE		0		/* No Machine */
+#define EM_M32		1		/* AT&T WE 32100 */
+#define EM_SPARC	2		/* SPARC */
+#define EM_386		3		/* Intel 80386 */
+#define EM_68K		4		/* Motorola 68000 */
+#define EM_88K		5		/* Motorola 88000 */
+#define EM_486		6		/* Intel 80486 - unused? */
+#define EM_860		7		/* Intel 80860 */
+#define EM_MIPS		8		/* MIPS R3000 Big-Endian only */
+/*
+ * Don't know if EM_MIPS_RS4_BE,
+ * EM_SPARC64, EM_PARISC,
+ * or EM_PPC are ABI compliant
+ */
+#define EM_MIPS_RS4_BE	10		/* MIPS R4000 Big-Endian */
+#define EM_SPARC64	11		/* SPARC v9 64-bit unoffical */
+#define EM_PARISC	15		/* HPPA */
+#define EM_SPARC32PLUS	18		/* Enhanced instruction set SPARC */
+#define EM_PPC		20		/* PowerPC */
+#define EM_PPC64	21		/* PowerPC 64-bit */
+#define EM_ARM		40		/* Advanced RISC Machines ARM */
+#define EM_ALPHA	41		/* DEC ALPHA */
+#define EM_SPARCV9	43		/* SPARC version 9 */
+#define EM_ALPHA_EXP	0x9026		/* DEC ALPHA */
+#define EM_IA_64	50		/* Intel Merced */
+#define EM_X86_64	62		/* AMD x86-64 architecture */
+#define EM_VAX		75		/* DEC VAX */
+
+/* Version */
+#define EV_NONE		0		/* Invalid */
+#define EV_CURRENT	1		/* Current */
+#define EV_NUM		2		/* number of versions */
+
+/* Section Header */
+typedef struct {
+	Elf32_Word	sh_name;	/* name - index into section header
+					   string table section */
+	Elf32_Word	sh_type;	/* type */
+	Elf32_Word	sh_flags;	/* flags */
+	Elf32_Addr	sh_addr;	/* address */
+	Elf32_Off	sh_offset;	/* file offset */
+	Elf32_Word	sh_size;	/* section size */
+	Elf32_Word	sh_link;	/* section header table index link */
+	Elf32_Word	sh_info;	/* extra information */
+	Elf32_Word	sh_addralign;	/* address alignment */
+	Elf32_Word	sh_entsize;	/* section entry size */
+} Elf32_Shdr;
+
+typedef struct {
+	Elf64_Half	sh_name;	/* section name */
+	Elf64_Half	sh_type;	/* section type */
+	Elf64_Xword	sh_flags;	/* section flags */
+	Elf64_Addr	sh_addr;	/* virtual address */
+	Elf64_Off	sh_offset;	/* file offset */
+	Elf64_Xword	sh_size;	/* section size */
+	Elf64_Half	sh_link;	/* link to another */
+	Elf64_Half	sh_info;	/* misc info */
+	Elf64_Xword	sh_addralign;	/* memory alignment */
+	Elf64_Xword	sh_entsize;	/* table entry size */
+} Elf64_Shdr;
+
+/* Special Section Indexes */
+#define SHN_UNDEF	0		/* undefined */
+#define SHN_LORESERVE	0xff00		/* lower bounds of reserved indexes */
+#define SHN_LOPROC	0xff00		/* reserved range for processor */
+#define SHN_HIPROC	0xff1f		/*   specific section indexes */
+#define SHN_ABS		0xfff1		/* absolute value */
+#define SHN_COMMON	0xfff2		/* common symbol */
+#define SHN_HIRESERVE	0xffff		/* upper bounds of reserved indexes */
+
+/* sh_type */
+#define SHT_NULL	0		/* inactive */
+#define SHT_PROGBITS	1		/* program defined information */
+#define SHT_SYMTAB	2		/* symbol table section */
+#define SHT_STRTAB	3		/* string table section */
+#define SHT_RELA	4		/* relocation section with addends*/
+#define SHT_HASH	5		/* symbol hash table section */
+#define SHT_DYNAMIC	6		/* dynamic section */
+#define SHT_NOTE	7		/* note section */
+#define SHT_NOBITS	8		/* no space section */
+#define SHT_REL		9		/* relation section without addends */
+#define SHT_SHLIB	10		/* reserved - purpose unknown */
+#define SHT_DYNSYM	11		/* dynamic symbol table section */
+#define SHT_NUM		12		/* number of section types */
+#define SHT_LOPROC	0x70000000	/* reserved range for processor */
+#define SHT_HIPROC	0x7fffffff	/*  specific section header types */
+#define SHT_LOUSER	0x80000000	/* reserved range for application */
+#define SHT_HIUSER	0xffffffff	/*  specific indexes */
+
+/* Section names */
+#define ELF_BSS         ".bss"		/* uninitialized data */
+#define ELF_DATA        ".data"		/* initialized data */
+#define ELF_DEBUG       ".debug"	/* debug */
+#define ELF_DYNAMIC     ".dynamic"	/* dynamic linking information */
+#define ELF_DYNSTR      ".dynstr"	/* dynamic string table */
+#define ELF_DYNSYM      ".dynsym"	/* dynamic symbol table */
+#define ELF_FINI        ".fini"		/* termination code */
+#define ELF_GOT         ".got"		/* global offset table */
+#define ELF_HASH        ".hash"		/* symbol hash table */
+#define ELF_INIT        ".init"		/* initialization code */
+#define ELF_REL_DATA    ".rel.data"	/* relocation data */
+#define ELF_REL_FINI    ".rel.fini"	/* relocation termination code */
+#define ELF_REL_INIT    ".rel.init"	/* relocation initialization code */
+#define ELF_REL_DYN     ".rel.dyn"	/* relocaltion dynamic link info */
+#define ELF_REL_RODATA  ".rel.rodata"	/* relocation read-only data */
+#define ELF_REL_TEXT    ".rel.text"	/* relocation code */
+#define ELF_RODATA      ".rodata"	/* read-only data */
+#define ELF_SHSTRTAB    ".shstrtab"	/* section header string table */
+#define ELF_STRTAB      ".strtab"	/* string table */
+#define ELF_SYMTAB      ".symtab"	/* symbol table */
+#define ELF_TEXT        ".text"		/* code */
+
+
+/* Section Attribute Flags - sh_flags */
+#define SHF_WRITE	0x1		/* Writable */
+#define SHF_ALLOC	0x2		/* occupies memory */
+#define SHF_EXECINSTR	0x4		/* executable */
+#define SHF_MASKPROC	0xf0000000	/* reserved bits for processor */
+					/*  specific section attributes */
+
+/* Symbol Table Entry */
+typedef struct elf32_sym {
+	Elf32_Word	st_name;	/* name - index into string table */
+	Elf32_Addr	st_value;	/* symbol value */
+	Elf32_Word	st_size;	/* symbol size */
+	unsigned char	st_info;	/* type and binding */
+	unsigned char	st_other;	/* 0 - no defined meaning */
+	Elf32_Half	st_shndx;	/* section header index */
+} Elf32_Sym;
+
+typedef struct {
+	Elf64_Half	st_name;	/* Symbol name index in str table */
+	Elf_Byte	st_info;	/* type / binding attrs */
+	Elf_Byte	st_other;	/* unused */
+	Elf64_Quarter	st_shndx;	/* section index of symbol */
+	Elf64_Xword	st_value;	/* value of symbol */
+	Elf64_Xword	st_size;	/* size of symbol */
+} Elf64_Sym;
+
+/* Symbol table index */
+#define STN_UNDEF	0		/* undefined */
+
+/* Extract symbol info - st_info */
+#define ELF32_ST_BIND(x)	((x) >> 4)
+#define ELF32_ST_TYPE(x)	(((unsigned int) x) & 0xf)
+#define ELF32_ST_INFO(b,t)	(((b) << 4) + ((t) & 0xf))
+
+#define ELF64_ST_BIND(x)	((x) >> 4)
+#define ELF64_ST_TYPE(x)	(((unsigned int) x) & 0xf)
+#define ELF64_ST_INFO(b,t)	(((b) << 4) + ((t) & 0xf))
+
+/* Symbol Binding - ELF32_ST_BIND - st_info */
+#define STB_LOCAL	0		/* Local symbol */
+#define STB_GLOBAL	1		/* Global symbol */
+#define STB_WEAK	2		/* like global - lower precedence */
+#define STB_NUM		3		/* number of symbol bindings */
+#define STB_LOPROC	13		/* reserved range for processor */
+#define STB_HIPROC	15		/*  specific symbol bindings */
+
+/* Symbol type - ELF32_ST_TYPE - st_info */
+#define STT_NOTYPE	0		/* not specified */
+#define STT_OBJECT	1		/* data object */
+#define STT_FUNC	2		/* function */
+#define STT_SECTION	3		/* section */
+#define STT_FILE	4		/* file */
+#define STT_NUM		5		/* number of symbol types */
+#define STT_LOPROC	13		/* reserved range for processor */
+#define STT_HIPROC	15		/*  specific symbol types */
+
+/* Relocation entry with implicit addend */
+typedef struct {
+	Elf32_Addr	r_offset;	/* offset of relocation */
+	Elf32_Word	r_info;		/* symbol table index and type */
+} Elf32_Rel;
+
+/* Relocation entry with explicit addend */
+typedef struct {
+	Elf32_Addr	r_offset;	/* offset of relocation */
+	Elf32_Word	r_info;		/* symbol table index and type */
+	Elf32_Sword	r_addend;
+} Elf32_Rela;
+
+/* Extract relocation info - r_info */
+#define ELF32_R_SYM(i)		((i) >> 8)
+#define ELF32_R_TYPE(i)		((unsigned char) (i))
+#define ELF32_R_INFO(s,t) 	(((s) << 8) + (unsigned char)(t))
+
+typedef struct {
+	Elf64_Xword	r_offset;	/* where to do it */
+	Elf64_Xword	r_info;		/* index & type of relocation */
+} Elf64_Rel;
+
+typedef struct {
+	Elf64_Xword	r_offset;	/* where to do it */
+	Elf64_Xword	r_info;		/* index & type of relocation */
+	Elf64_Sxword	r_addend;	/* adjustment value */
+} Elf64_Rela;
+
+#define	ELF64_R_SYM(info)	((info) >> 32)
+#define	ELF64_R_TYPE(info)	((info) & 0xFFFFFFFF)
+#define ELF64_R_INFO(s,t) 	(((s) << 32) + (u_int32_t)(t))
+
+/* Program Header */
+typedef struct {
+	Elf32_Word	p_type;		/* segment type */
+	Elf32_Off	p_offset;	/* segment offset */
+	Elf32_Addr	p_vaddr;	/* virtual address of segment */
+	Elf32_Addr	p_paddr;	/* physical address - ignored? */
+	Elf32_Word	p_filesz;	/* number of bytes in file for seg. */
+	Elf32_Word	p_memsz;	/* number of bytes in mem. for seg. */
+	Elf32_Word	p_flags;	/* flags */
+	Elf32_Word	p_align;	/* memory alignment */
+} Elf32_Phdr;
+
+typedef struct {
+	Elf64_Half	p_type;		/* entry type */
+	Elf64_Half	p_flags;	/* flags */
+	Elf64_Off	p_offset;	/* offset */
+	Elf64_Addr	p_vaddr;	/* virtual address */
+	Elf64_Addr	p_paddr;	/* physical address */
+	Elf64_Xword	p_filesz;	/* file size */
+	Elf64_Xword	p_memsz;	/* memory size */
+	Elf64_Xword	p_align;	/* memory & file alignment */
+} Elf64_Phdr;
+
+/* Segment types - p_type */
+#define PT_NULL		0		/* unused */
+#define PT_LOAD		1		/* loadable segment */
+#define PT_DYNAMIC	2		/* dynamic linking section */
+#define PT_INTERP	3		/* the RTLD */
+#define PT_NOTE		4		/* auxiliary information */
+#define PT_SHLIB	5		/* reserved - purpose undefined */
+#define PT_PHDR		6		/* program header */
+#define PT_NUM		7		/* Number of segment types */
+#define PT_LOPROC	0x70000000	/* reserved range for processor */
+#define PT_HIPROC	0x7fffffff	/*  specific segment types */
+
+/* Segment flags - p_flags */
+#define PF_X		0x1		/* Executable */
+#define PF_W		0x2		/* Writable */
+#define PF_R		0x4		/* Readable */
+#define PF_MASKPROC	0xf0000000	/* reserved bits for processor */
+					/*  specific segment flags */
+
+/* Dynamic structure */
+typedef struct {
+	Elf32_Sword	d_tag;		/* controls meaning of d_val */
+	union {
+		Elf32_Word	d_val;	/* Multiple meanings - see d_tag */
+		Elf32_Addr	d_ptr;	/* program virtual address */
+	} d_un;
+} Elf32_Dyn;
+
+typedef struct {
+	Elf64_Xword	d_tag;		/* controls meaning of d_val */
+	union {
+		Elf64_Addr	d_ptr;
+		Elf64_Xword	d_val;
+	} d_un;
+} Elf64_Dyn;
+
+/* Dynamic Array Tags - d_tag */
+#define DT_NULL		0		/* marks end of _DYNAMIC array */
+#define DT_NEEDED	1		/* string table offset of needed lib */
+#define DT_PLTRELSZ	2		/* size of relocation entries in PLT */
+#define DT_PLTGOT	3		/* address PLT/GOT */
+#define DT_HASH		4		/* address of symbol hash table */
+#define DT_STRTAB	5		/* address of string table */
+#define DT_SYMTAB	6		/* address of symbol table */
+#define DT_RELA		7		/* address of relocation table */
+#define DT_RELASZ	8		/* size of relocation table */
+#define DT_RELAENT	9		/* size of relocation entry */
+#define DT_STRSZ	10		/* size of string table */
+#define DT_SYMENT	11		/* size of symbol table entry */
+#define DT_INIT		12		/* address of initialization func. */
+#define DT_FINI		13		/* address of termination function */
+#define DT_SONAME	14		/* string table offset of shared obj */
+#define DT_RPATH	15		/* string table offset of library
+					   search path */
+#define DT_SYMBOLIC	16		/* start sym search in shared obj. */
+#define DT_REL		17		/* address of rel. tbl. w addends */
+#define DT_RELSZ	18		/* size of DT_REL relocation table */
+#define DT_RELENT	19		/* size of DT_REL relocation entry */
+#define DT_PLTREL	20		/* PLT referenced relocation entry */
+#define DT_DEBUG	21		/* bugger */
+#define DT_TEXTREL	22		/* Allow rel. mod. to unwritable seg */
+#define DT_JMPREL	23		/* add. of PLT's relocation entries */
+#define DT_BIND_NOW	24		/* Bind now regardless of env setting */
+#define DT_NUM		25		/* Number used. */
+#define DT_LOPROC	0x70000000	/* reserved range for processor */
+#define DT_HIPROC	0x7fffffff	/*  specific dynamic array tags */
+
+/* Standard ELF hashing function */
+unsigned int elf_hash(const unsigned char *name);
+
+/*
+ * Note Definitions
+ */
+typedef struct {
+	Elf32_Word namesz;
+	Elf32_Word descsz;
+	Elf32_Word type;
+} Elf32_Note;
+
+typedef struct {
+	Elf64_Half namesz;
+	Elf64_Half descsz;
+	Elf64_Half type;
+} Elf64_Note;
+
+
+#if defined(ELFSIZE)
+#define CONCAT(x,y)	__CONCAT(x,y)
+#define ELFNAME(x)	CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x)))
+#define ELFNAME2(x,y)	CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y))))
+#define ELFNAMEEND(x)	CONCAT(x,CONCAT(_elf,ELFSIZE))
+#define ELFDEFNNAME(x)	CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x)))
+#endif
+
+#if defined(ELFSIZE) && (ELFSIZE == 32)
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Rel		Elf32_Rel
+#define Elf_RelA	Elf32_Rela
+#define Elf_Dyn		Elf32_Dyn
+#define Elf_Word	Elf32_Word
+#define Elf_Sword	Elf32_Sword
+#define Elf_Addr	Elf32_Addr
+#define Elf_Off		Elf32_Off
+#define Elf_Nhdr	Elf32_Nhdr
+#define Elf_Note	Elf32_Note
+
+#define ELF_R_SYM	ELF32_R_SYM
+#define ELF_R_TYPE	ELF32_R_TYPE
+#define ELF_R_INFO	ELF32_R_INFO
+#define ELFCLASS	ELFCLASS32
+
+#define ELF_ST_BIND	ELF32_ST_BIND
+#define ELF_ST_TYPE	ELF32_ST_TYPE
+#define ELF_ST_INFO	ELF32_ST_INFO
+
+#define AuxInfo		Aux32Info
+#elif defined(ELFSIZE) && (ELFSIZE == 64)
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Sym		Elf64_Sym
+#define Elf_Rel		Elf64_Rel
+#define Elf_RelA	Elf64_Rela
+#define Elf_Dyn		Elf64_Dyn
+#define Elf_Word	Elf64_Word
+#define Elf_Sword	Elf64_Sword
+#define Elf_Addr	Elf64_Addr
+#define Elf_Off		Elf64_Off
+#define Elf_Nhdr	Elf64_Nhdr
+#define Elf_Note	Elf64_Note
+
+#define ELF_R_SYM	ELF64_R_SYM
+#define ELF_R_TYPE	ELF64_R_TYPE
+#define ELF_R_INFO	ELF64_R_INFO
+#define ELFCLASS	ELFCLASS64
+
+#define ELF_ST_BIND	ELF64_ST_BIND
+#define ELF_ST_TYPE	ELF64_ST_TYPE
+#define ELF_ST_INFO	ELF64_ST_INFO
+
+#define AuxInfo		Aux64Info
+#endif
+
+#endif /* __XEN_PUBLIC_ELFSTRUCTS_H__ */

Property changes on: xen/interface/elfstructs.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/kexec.h
===================================================================
--- xen/interface/kexec.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/kexec.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,189 @@
+/******************************************************************************
+ * kexec.h - Public portion
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ * 
+ * Xen port written by:
+ * - Simon 'Horms' Horman <horms@verge.net.au>
+ * - Magnus Damm <magnus@valinux.co.jp>
+ */
+
+#ifndef _XEN_PUBLIC_KEXEC_H
+#define _XEN_PUBLIC_KEXEC_H
+
+
+/* This file describes the Kexec / Kdump hypercall interface for Xen.
+ *
+ * Kexec under vanilla Linux allows a user to reboot the physical machine 
+ * into a new user-specified kernel. The Xen port extends this idea
+ * to allow rebooting of the machine from dom0. When kexec for dom0
+ * is used to reboot,  both the hypervisor and the domains get replaced
+ * with some other kernel. It is possible to kexec between vanilla
+ * Linux and Xen and back again. Xen to Xen works well too.
+ *
+ * The hypercall interface for kexec can be divided into three main
+ * types of hypercall operations:
+ *
+ * 1) Range information:
+ *    This is used by the dom0 kernel to ask the hypervisor about various 
+ *    address information. This information is needed to allow kexec-tools 
+ *    to fill in the ELF headers for /proc/vmcore properly.
+ *
+ * 2) Load and unload of images:
+ *    There are no big surprises here, the kexec binary from kexec-tools
+ *    runs in userspace in dom0. The tool loads/unloads data into the
+ *    dom0 kernel such as new kernel, initramfs and hypervisor. When
+ *    loaded the dom0 kernel performs a load hypercall operation, and
+ *    before releasing all page references the dom0 kernel calls unload.
+ *
+ * 3) Kexec operation:
+ *    This is used to start a previously loaded kernel.
+ */
+
+#include "xen.h"
+
+#if defined(__i386__) || defined(__x86_64__)
+#define KEXEC_XEN_NO_PAGES 17
+#endif
+
+/*
+ * Prototype for this hypercall is:
+ *  int kexec_op(int cmd, void *args)
+ * @cmd  == KEXEC_CMD_... 
+ *          KEXEC operation to perform
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+/*
+ * Kexec supports two types of operation:
+ * - kexec into a regular kernel, very similar to a standard reboot
+ *   - KEXEC_TYPE_DEFAULT is used to specify this type
+ * - kexec into a special "crash kernel", aka kexec-on-panic
+ *   - KEXEC_TYPE_CRASH is used to specify this type
+ *   - parts of our system may be broken at kexec-on-panic time
+ *     - the code should be kept as simple and self-contained as possible
+ */
+
+#define KEXEC_TYPE_DEFAULT 0
+#define KEXEC_TYPE_CRASH   1
+
+
+/* The kexec implementation for Xen allows the user to load two
+ * types of kernels, KEXEC_TYPE_DEFAULT and KEXEC_TYPE_CRASH.
+ * All data needed for a kexec reboot is kept in one xen_kexec_image_t
+ * per "instance". The data mainly consists of machine address lists to pages
+ * together with destination addresses. The data in xen_kexec_image_t
+ * is passed to the "code page" which is one page of code that performs
+ * the final relocations before jumping to the new kernel.
+ */
+ 
+typedef struct xen_kexec_image {
+#if defined(__i386__) || defined(__x86_64__)
+    unsigned long page_list[KEXEC_XEN_NO_PAGES];
+#endif
+#if defined(__ia64__)
+    unsigned long reboot_code_buffer;
+#endif
+    unsigned long indirection_page;
+    unsigned long start_address;
+} xen_kexec_image_t;
+
+/*
+ * Perform kexec having previously loaded a kexec or kdump kernel
+ * as appropriate.
+ * type == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ */
+#define KEXEC_CMD_kexec                 0
+typedef struct xen_kexec_exec {
+    int type;
+} xen_kexec_exec_t;
+
+/*
+ * Load/Unload kernel image for kexec or kdump.
+ * type  == KEXEC_TYPE_DEFAULT or KEXEC_TYPE_CRASH [in]
+ * image == relocation information for kexec (ignored for unload) [in]
+ */
+#define KEXEC_CMD_kexec_load            1
+#define KEXEC_CMD_kexec_unload          2
+typedef struct xen_kexec_load {
+    int type;
+    xen_kexec_image_t image;
+} xen_kexec_load_t;
+
+#define KEXEC_RANGE_MA_CRASH      0 /* machine address and size of crash area */
+#define KEXEC_RANGE_MA_XEN        1 /* machine address and size of Xen itself */
+#define KEXEC_RANGE_MA_CPU        2 /* machine address and size of a CPU note */
+#define KEXEC_RANGE_MA_XENHEAP    3 /* machine address and size of xenheap
+                                     * Note that although this is adjacent
+                                     * to Xen it exists in a separate EFI
+                                     * region on ia64, and thus needs to be
+                                     * inserted into iomem_machine separately */
+#define KEXEC_RANGE_MA_BOOT_PARAM 4 /* machine address and size of
+                                     * the ia64_boot_param */
+#define KEXEC_RANGE_MA_EFI_MEMMAP 5 /* machine address and size of
+                                     * of the EFI Memory Map */
+#define KEXEC_RANGE_MA_VMCOREINFO 6 /* machine address and size of vmcoreinfo */
+
+/*
+ * Find the address and size of certain memory areas
+ * range == KEXEC_RANGE_... [in]
+ * nr    == physical CPU number (starting from 0) if KEXEC_RANGE_MA_CPU [in]
+ * size  == number of bytes reserved in window [out]
+ * start == address of the first byte in the window [out]
+ */
+#define KEXEC_CMD_kexec_get_range       3
+typedef struct xen_kexec_range {
+    int range;
+    int nr;
+    unsigned long size;
+    unsigned long start;
+} xen_kexec_range_t;
+
+/* vmcoreinfo stuff */
+#define VMCOREINFO_BYTES           (4096)
+#define VMCOREINFO_NOTE_NAME       "VMCOREINFO_XEN"
+void arch_crash_save_vmcoreinfo(void);
+void vmcoreinfo_append_str(const char *fmt, ...)
+       __attribute__ ((format (printf, 1, 2)));
+#define VMCOREINFO_PAGESIZE(value) \
+       vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \
+       vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name)
+#define VMCOREINFO_STRUCT_SIZE(name) \
+       vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+                             (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \
+       vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \
+                             (unsigned long)offsetof(struct name, field))
+
+#endif /* _XEN_PUBLIC_KEXEC_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/kexec.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/xenoprof.h
===================================================================
--- xen/interface/xenoprof.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/xenoprof.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,138 @@
+/******************************************************************************
+ * xenoprof.h
+ * 
+ * Interface for enabling system wide profiling based on hardware performance
+ * counters
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Hewlett-Packard Co.
+ * Written by Aravind Menon & Jose Renato Santos
+ */
+
+#ifndef __XEN_PUBLIC_XENOPROF_H__
+#define __XEN_PUBLIC_XENOPROF_H__
+
+#include "xen.h"
+
+/*
+ * Commands to HYPERVISOR_xenoprof_op().
+ */
+#define XENOPROF_init                0
+#define XENOPROF_reset_active_list   1
+#define XENOPROF_reset_passive_list  2
+#define XENOPROF_set_active          3
+#define XENOPROF_set_passive         4
+#define XENOPROF_reserve_counters    5
+#define XENOPROF_counter             6
+#define XENOPROF_setup_events        7
+#define XENOPROF_enable_virq         8
+#define XENOPROF_start               9
+#define XENOPROF_stop               10
+#define XENOPROF_disable_virq       11
+#define XENOPROF_release_counters   12
+#define XENOPROF_shutdown           13
+#define XENOPROF_get_buffer         14
+#define XENOPROF_set_backtrace      15
+#define XENOPROF_last_op            15
+
+#define MAX_OPROF_EVENTS    32
+#define MAX_OPROF_DOMAINS   25
+#define XENOPROF_CPU_TYPE_SIZE 64
+
+/* Xenoprof performance events (not Xen events) */
+struct event_log {
+    uint64_t eip;
+    uint8_t mode;
+    uint8_t event;
+};
+
+/* PC value that indicates a special code */
+#define XENOPROF_ESCAPE_CODE ~0UL
+/* Transient events for the xenoprof->oprofile cpu buf */
+#define XENOPROF_TRACE_BEGIN 1
+
+/* Xenoprof buffer shared between Xen and domain - 1 per VCPU */
+struct xenoprof_buf {
+    uint32_t event_head;
+    uint32_t event_tail;
+    uint32_t event_size;
+    uint32_t vcpu_id;
+    uint64_t xen_samples;
+    uint64_t kernel_samples;
+    uint64_t user_samples;
+    uint64_t lost_samples;
+    struct event_log event_log[1];
+};
+#ifndef __XEN__
+typedef struct xenoprof_buf xenoprof_buf_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_buf_t);
+#endif
+
+struct xenoprof_init {
+    int32_t  num_events;
+    int32_t  is_primary;
+    char cpu_type[XENOPROF_CPU_TYPE_SIZE];
+};
+typedef struct xenoprof_init xenoprof_init_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_init_t);
+
+struct xenoprof_get_buffer {
+    int32_t  max_samples;
+    int32_t  nbuf;
+    int32_t  bufsize;
+    uint64_t buf_gmaddr;
+};
+typedef struct xenoprof_get_buffer xenoprof_get_buffer_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_get_buffer_t);
+
+struct xenoprof_counter {
+    uint32_t ind;
+    uint64_t count;
+    uint32_t enabled;
+    uint32_t event;
+    uint32_t hypervisor;
+    uint32_t kernel;
+    uint32_t user;
+    uint64_t unit_mask;
+};
+typedef struct xenoprof_counter xenoprof_counter_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_counter_t);
+
+typedef struct xenoprof_passive {
+    uint16_t domain_id;
+    int32_t  max_samples;
+    int32_t  nbuf;
+    int32_t  bufsize;
+    uint64_t buf_gmaddr;
+} xenoprof_passive_t;
+DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t);
+
+
+#endif /* __XEN_PUBLIC_XENOPROF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/xenoprof.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/acm.h
===================================================================
--- xen/interface/acm.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/acm.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,228 @@
+/*
+ * acm.h: Xen access control module interface defintions
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Reiner Sailer <sailer@watson.ibm.com>
+ * Copyright (c) 2005, International Business Machines Corporation.
+ */
+
+#ifndef _XEN_PUBLIC_ACM_H
+#define _XEN_PUBLIC_ACM_H
+
+#include "xen.h"
+
+/* if ACM_DEBUG defined, all hooks should
+ * print a short trace message (comment it out
+ * when not in testing mode )
+ */
+/* #define ACM_DEBUG */
+
+#ifdef ACM_DEBUG
+#  define printkd(fmt, args...) printk(fmt,## args)
+#else
+#  define printkd(fmt, args...)
+#endif
+
+/* default ssid reference value if not supplied */
+#define ACM_DEFAULT_SSID  0x0
+#define ACM_DEFAULT_LOCAL_SSID  0x0
+
+/* Internal ACM ERROR types */
+#define ACM_OK     0
+#define ACM_UNDEF   -1
+#define ACM_INIT_SSID_ERROR  -2
+#define ACM_INIT_SOID_ERROR  -3
+#define ACM_ERROR          -4
+
+/* External ACCESS DECISIONS */
+#define ACM_ACCESS_PERMITTED        0
+#define ACM_ACCESS_DENIED           -111
+#define ACM_NULL_POINTER_ERROR      -200
+
+/*
+   Error codes reported in when trying to test for a new policy
+   These error codes are reported in an array of tuples where
+   each error code is followed by a parameter describing the error
+   more closely, such as a domain id.
+*/
+#define ACM_EVTCHN_SHARING_VIOLATION       0x100
+#define ACM_GNTTAB_SHARING_VIOLATION       0x101
+#define ACM_DOMAIN_LOOKUP                  0x102
+#define ACM_CHWALL_CONFLICT                0x103
+#define ACM_SSIDREF_IN_USE                 0x104
+
+
+/* primary policy in lower 4 bits */
+#define ACM_NULL_POLICY 0
+#define ACM_CHINESE_WALL_POLICY 1
+#define ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY 2
+#define ACM_POLICY_UNDEFINED 15
+
+/* combinations have secondary policy component in higher 4bit */
+#define ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY \
+    ((ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY << 4) | ACM_CHINESE_WALL_POLICY)
+
+/* policy: */
+#define ACM_POLICY_NAME(X) \
+ ((X) == (ACM_NULL_POLICY)) ? "NULL" :                        \
+    ((X) == (ACM_CHINESE_WALL_POLICY)) ? "CHINESE WALL" :        \
+    ((X) == (ACM_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "SIMPLE TYPE ENFORCEMENT" : \
+    ((X) == (ACM_CHINESE_WALL_AND_SIMPLE_TYPE_ENFORCEMENT_POLICY)) ? "CHINESE WALL AND SIMPLE TYPE ENFORCEMENT" : \
+     "UNDEFINED"
+
+/* the following policy versions must be increased
+ * whenever the interpretation of the related
+ * policy's data structure changes
+ */
+#define ACM_POLICY_VERSION 3
+#define ACM_CHWALL_VERSION 1
+#define ACM_STE_VERSION  1
+
+/* defines a ssid reference used by xen */
+typedef uint32_t ssidref_t;
+
+/* hooks that are known to domains */
+#define ACMHOOK_none    0
+#define ACMHOOK_sharing 1
+
+/* -------security policy relevant type definitions-------- */
+
+/* type identifier; compares to "equal" or "not equal" */
+typedef uint16_t domaintype_t;
+
+/* CHINESE WALL POLICY DATA STRUCTURES
+ *
+ * current accumulated conflict type set:
+ * When a domain is started and has a type that is in
+ * a conflict set, the conflicting types are incremented in
+ * the aggregate set. When a domain is destroyed, the 
+ * conflicting types to its type are decremented.
+ * If a domain has multiple types, this procedure works over
+ * all those types.
+ *
+ * conflict_aggregate_set[i] holds the number of
+ *   running domains that have a conflict with type i.
+ *
+ * running_types[i] holds the number of running domains
+ *        that include type i in their ssidref-referenced type set
+ *
+ * conflict_sets[i][j] is "0" if type j has no conflict
+ *    with type i and is "1" otherwise.
+ */
+/* high-16 = version, low-16 = check magic */
+#define ACM_MAGIC  0x0001debc
+
+/* each offset in bytes from start of the struct they
+ * are part of */
+
+/* V3 of the policy buffer aded a version structure */
+struct acm_policy_version
+{
+    uint32_t major;
+    uint32_t minor;
+};
+
+
+/* each buffer consists of all policy information for
+ * the respective policy given in the policy code
+ *
+ * acm_policy_buffer, acm_chwall_policy_buffer,
+ * and acm_ste_policy_buffer need to stay 32-bit aligned
+ * because we create binary policies also with external
+ * tools that assume packed representations (e.g. the java tool)
+ */
+struct acm_policy_buffer {
+    uint32_t policy_version; /* ACM_POLICY_VERSION */
+    uint32_t magic;
+    uint32_t len;
+    uint32_t policy_reference_offset;
+    uint32_t primary_policy_code;
+    uint32_t primary_buffer_offset;
+    uint32_t secondary_policy_code;
+    uint32_t secondary_buffer_offset;
+    struct acm_policy_version xml_pol_version; /* add in V3 */
+};
+
+
+struct acm_policy_reference_buffer {
+    uint32_t len;
+};
+
+struct acm_chwall_policy_buffer {
+    uint32_t policy_version; /* ACM_CHWALL_VERSION */
+    uint32_t policy_code;
+    uint32_t chwall_max_types;
+    uint32_t chwall_max_ssidrefs;
+    uint32_t chwall_max_conflictsets;
+    uint32_t chwall_ssid_offset;
+    uint32_t chwall_conflict_sets_offset;
+    uint32_t chwall_running_types_offset;
+    uint32_t chwall_conflict_aggregate_offset;
+};
+
+struct acm_ste_policy_buffer {
+    uint32_t policy_version; /* ACM_STE_VERSION */
+    uint32_t policy_code;
+    uint32_t ste_max_types;
+    uint32_t ste_max_ssidrefs;
+    uint32_t ste_ssid_offset;
+};
+
+struct acm_stats_buffer {
+    uint32_t magic;
+    uint32_t len;
+    uint32_t primary_policy_code;
+    uint32_t primary_stats_offset;
+    uint32_t secondary_policy_code;
+    uint32_t secondary_stats_offset;
+};
+
+struct acm_ste_stats_buffer {
+    uint32_t ec_eval_count;
+    uint32_t gt_eval_count;
+    uint32_t ec_denied_count;
+    uint32_t gt_denied_count;
+    uint32_t ec_cachehit_count;
+    uint32_t gt_cachehit_count;
+};
+
+struct acm_ssid_buffer {
+    uint32_t len;
+    ssidref_t ssidref;
+    uint32_t policy_reference_offset;
+    uint32_t primary_policy_code;
+    uint32_t primary_max_types;
+    uint32_t primary_types_offset;
+    uint32_t secondary_policy_code;
+    uint32_t secondary_max_types;
+    uint32_t secondary_types_offset;
+};
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/acm.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-x86_32.h
===================================================================
--- xen/interface/arch-x86_32.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-x86_32.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,27 @@
+/******************************************************************************
+ * arch-x86_32.h
+ * 
+ * Guest OS interface to x86 32-bit Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2004-2006, K A Fraser
+ */
+
+#include <xen/interface/arch-x86/xen.h>

Property changes on: xen/interface/arch-x86_32.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/xencomm.h
===================================================================
--- xen/interface/xencomm.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/xencomm.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,41 @@
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) IBM Corp. 2006
+ */
+
+#ifndef _XEN_XENCOMM_H_
+#define _XEN_XENCOMM_H_
+
+/* A xencomm descriptor is a scatter/gather list containing physical
+ * addresses corresponding to a virtually contiguous memory area. The
+ * hypervisor translates these physical addresses to machine addresses to copy
+ * to and from the virtually contiguous area.
+ */
+
+#define XENCOMM_MAGIC 0x58434F4D /* 'XCOM' */
+#define XENCOMM_INVALID (~0UL)
+
+struct xencomm_desc {
+    uint32_t magic;
+    uint32_t nr_addrs; /* the number of entries in address[] */
+    uint64_t address[0];
+};
+
+#endif /* _XEN_XENCOMM_H_ */

Property changes on: xen/interface/xencomm.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/memory.h
===================================================================
--- xen/interface/memory.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/memory.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,312 @@
+/******************************************************************************
+ * memory.h
+ * 
+ * Memory reservation and information.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_MEMORY_H__
+#define __XEN_PUBLIC_MEMORY_H__
+
+/*
+ * Increase or decrease the specified domain's memory reservation. Returns the
+ * number of extents successfully allocated or freed.
+ * arg == addr of struct xen_memory_reservation.
+ */
+#define XENMEM_increase_reservation 0
+#define XENMEM_decrease_reservation 1
+#define XENMEM_populate_physmap     6
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+/*
+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O 
+ * devices often have a 32-bit limitation even in 64-bit systems). If zero 
+ * then the user has no addressing restriction. This field is not used by 
+ * XENMEM_decrease_reservation.
+ */
+#define XENMEMF_address_bits(x)     (x)
+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
+/* NUMA node to allocate from. */
+#define XENMEMF_node(x)     (((x) + 1) << 8)
+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+#endif
+
+struct xen_memory_reservation {
+
+    /*
+     * XENMEM_increase_reservation:
+     *   OUT: MFN (*not* GMFN) bases of extents that were allocated
+     * XENMEM_decrease_reservation:
+     *   IN:  GMFN bases of extents to free
+     * XENMEM_populate_physmap:
+     *   IN:  GPFN bases of extents to populate with memory
+     *   OUT: GMFN bases of extents that were allocated
+     *   (NB. This command also updates the mach_to_phys translation table)
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+    /* Number of extents, and size/alignment of each (2^extent_order pages). */
+    xen_ulong_t    nr_extents;
+    unsigned int   extent_order;
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+    /* XENMEMF flags. */
+    unsigned int   mem_flags;
+#else
+    unsigned int   address_bits;
+#endif
+
+    /*
+     * Domain whose reservation is being changed.
+     * Unprivileged domains can specify only DOMID_SELF.
+     */
+    domid_t        domid;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
+
+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange             11
+struct xen_memory_exchange {
+    /*
+     * [IN] Details of memory extents to be exchanged (GMFN bases).
+     * Note that @in.address_bits is ignored and unused.
+     */
+    struct xen_memory_reservation in;
+
+    /*
+     * [IN/OUT] Details of new memory extents.
+     * We require that:
+     *  1. @in.domid == @out.domid
+     *  2. @in.nr_extents  << @in.extent_order == 
+     *     @out.nr_extents << @out.extent_order
+     *  3. @in.extent_start and @out.extent_start lists must not overlap
+     *  4. @out.extent_start lists GPFN bases to be populated
+     *  5. @out.extent_start is overwritten with allocated GMFN bases
+     */
+    struct xen_memory_reservation out;
+
+    /*
+     * [OUT] Number of input extents that were successfully exchanged:
+     *  1. The first @nr_exchanged input extents were successfully
+     *     deallocated.
+     *  2. The corresponding first entries in the output extent list correctly
+     *     indicate the GMFNs that were successfully exchanged.
+     *  3. All other input and output extents are untouched.
+     *  4. If not all input exents are exchanged then the return code of this
+     *     command will be non-zero.
+     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+     */
+    xen_ulong_t nr_exchanged;
+};
+typedef struct xen_memory_exchange xen_memory_exchange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
+
+/*
+ * Returns the maximum machine frame number of mapped RAM in this system.
+ * This command always succeeds (it never returns an error code).
+ * arg == NULL.
+ */
+#define XENMEM_maximum_ram_page     2
+
+/*
+ * Returns the current or maximum memory reservation, in pages, of the
+ * specified domain (may be DOMID_SELF). Returns -ve errcode on failure.
+ * arg == addr of domid_t.
+ */
+#define XENMEM_current_reservation  3
+#define XENMEM_maximum_reservation  4
+
+/*
+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.
+ */
+#define XENMEM_maximum_gpfn         14
+
+/*
+ * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table do not implement
+ * this command.
+ * arg == addr of xen_machphys_mfn_list_t.
+ */
+#define XENMEM_machphys_mfn_list    5
+struct xen_machphys_mfn_list {
+    /*
+     * Size of the 'extent_start' array. Fewer entries will be filled if the
+     * machphys table is smaller than max_extents * 2MB.
+     */
+    unsigned int max_extents;
+
+    /*
+     * Pointer to buffer to fill with list of extent starts. If there are
+     * any large discontiguities in the machine address space, 2MB gaps in
+     * the machphys table will be represented by an MFN base of zero.
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
+
+    /*
+     * Number of extents written to the above array. This will be smaller
+     * than 'max_extents' if the machphys table is smaller than max_e * 2MB.
+     */
+    unsigned int nr_extents;
+};
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
+
+/*
+ * Sets the GPFN at which a particular page appears in the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_add_to_physmap_t.
+ */
+#define XENMEM_add_to_physmap      7
+struct xen_add_to_physmap {
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+
+    /* Source mapping space. */
+#define XENMAPSPACE_shared_info 0 /* shared info page */
+#define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_mfn         2 /* usual MFN */
+    unsigned int space;
+
+    /* Index into source mapping space. */
+    xen_ulong_t idx;
+
+    /* GPFN where the source mapping page should appear. */
+    xen_pfn_t     gpfn;
+};
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+
+/*
+ * Unmaps the page appearing at a particular GPFN from the specified guest's
+ * pseudophysical address space.
+ * arg == addr of xen_remove_from_physmap_t.
+ */
+#define XENMEM_remove_from_physmap      15
+struct xen_remove_from_physmap {
+    /* Which domain to change the mapping for. */
+    domid_t domid;
+
+    /* GPFN of the current mapping of the page. */
+    xen_pfn_t     gpfn;
+};
+typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
+
+/*
+ * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
+ * code on failure. This call only works for auto-translated guests.
+ */
+#define XENMEM_translate_gpfn_list  8
+struct xen_translate_gpfn_list {
+    /* Which domain to translate for? */
+    domid_t domid;
+
+    /* Length of list. */
+    xen_ulong_t nr_gpfns;
+
+    /* List of GPFNs to translate. */
+    XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
+
+    /*
+     * Output list to contain MFN translations. May be the same as the input
+     * list (in which case each input GPFN is overwritten with the output MFN).
+     */
+    XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
+};
+typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+
+/*
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;
+
+    /*
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
+     */
+    XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_machine_memory_map   10
+
+/*
+ * Set the pseudo-physical memory map of a domain, as returned by
+ * XENMEM_memory_map.
+ * arg == addr of xen_foreign_memory_map_t.
+ */
+#define XENMEM_set_memory_map       13
+struct xen_foreign_memory_map {
+    domid_t domid;
+    struct xen_memory_map map;
+};
+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+
+#endif /* __XEN_PUBLIC_MEMORY_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/memory.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/event_channel.h
===================================================================
--- xen/interface/event_channel.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/event_channel.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,264 @@
+/******************************************************************************
+ * event_channel.h
+ * 
+ * Event channels between domains.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, K A Fraser.
+ */
+
+#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__
+#define __XEN_PUBLIC_EVENT_CHANNEL_H__
+
+/*
+ * Prototype for this hypercall is:
+ *  int event_channel_op(int cmd, void *args)
+ * @cmd  == EVTCHNOP_??? (event-channel operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+typedef uint32_t evtchn_port_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);
+
+/*
+ * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
+ * accepting interdomain bindings from domain <remote_dom>. A fresh port
+ * is allocated in <dom> and returned as <port>.
+ * NOTES:
+ *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
+ *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_alloc_unbound    6
+struct evtchn_alloc_unbound {
+    /* IN parameters */
+    domid_t dom, remote_dom;
+    /* OUT parameters */
+    evtchn_port_t port;
+};
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;
+
+/*
+ * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
+ * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify
+ * a port that is unbound and marked as accepting bindings from the calling
+ * domain. A fresh port is allocated in the calling domain and returned as
+ * <local_port>.
+ * NOTES:
+ *  2. <remote_dom> may be DOMID_SELF, allowing loopback connections.
+ */
+#define EVTCHNOP_bind_interdomain 0
+struct evtchn_bind_interdomain {
+    /* IN parameters. */
+    domid_t remote_dom;
+    evtchn_port_t remote_port;
+    /* OUT parameters. */
+    evtchn_port_t local_port;
+};
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;
+
+/*
+ * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
+ * vcpu.
+ * NOTES:
+ *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
+ *     in xen.h for the classification of each VIRQ.
+ *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be
+ *     re-bound via EVTCHNOP_bind_vcpu.
+ *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
+ *     The allocated event channel is bound to the specified vcpu and the
+ *     binding cannot be changed.
+ */
+#define EVTCHNOP_bind_virq        1
+struct evtchn_bind_virq {
+    /* IN parameters. */
+    uint32_t virq;
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;
+
+/*
+ * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
+ * NOTES:
+ *  1. A physical IRQ may be bound to at most one event channel per domain.
+ *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
+ */
+#define EVTCHNOP_bind_pirq        2
+struct evtchn_bind_pirq {
+    /* IN parameters. */
+    uint32_t pirq;
+#define BIND_PIRQ__WILL_SHARE 1
+    uint32_t flags; /* BIND_PIRQ__* */
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;
+
+/*
+ * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
+ * NOTES:
+ *  1. The allocated event channel is bound to the specified vcpu. The binding
+ *     may not be changed.
+ */
+#define EVTCHNOP_bind_ipi         7
+struct evtchn_bind_ipi {
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;
+
+/*
+ * EVTCHNOP_close: Close a local event channel <port>. If the channel is
+ * interdomain then the remote end is placed in the unbound state
+ * (EVTCHNSTAT_unbound), awaiting a new connection.
+ */
+#define EVTCHNOP_close            3
+struct evtchn_close {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_close evtchn_close_t;
+
+/*
+ * EVTCHNOP_send: Send an event to the remote end of the channel whose local
+ * endpoint is <port>.
+ */
+#define EVTCHNOP_send             4
+struct evtchn_send {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_send evtchn_send_t;
+
+/*
+ * EVTCHNOP_status: Get the current status of the communication channel which
+ * has an endpoint at <dom, port>.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may obtain the status of an event
+ *     channel for which <dom> is not DOMID_SELF.
+ */
+#define EVTCHNOP_status           5
+struct evtchn_status {
+    /* IN parameters */
+    domid_t  dom;
+    evtchn_port_t port;
+    /* OUT parameters */
+#define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
+#define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */
+#define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */
+    uint32_t status;
+    uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
+    union {
+        struct {
+            domid_t dom;
+        } unbound; /* EVTCHNSTAT_unbound */
+        struct {
+            domid_t dom;
+            evtchn_port_t port;
+        } interdomain; /* EVTCHNSTAT_interdomain */
+        uint32_t pirq;      /* EVTCHNSTAT_pirq        */
+        uint32_t virq;      /* EVTCHNSTAT_virq        */
+    } u;
+};
+typedef struct evtchn_status evtchn_status_t;
+
+/*
+ * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
+ * event is pending.
+ * NOTES:
+ *  1. IPI-bound channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  3. All other channels notify vcpu0 by default. This default is set when
+ *     the channel is allocated (a port that is freed and subsequently reused
+ *     has its binding reset to vcpu0).
+ */
+#define EVTCHNOP_bind_vcpu        8
+struct evtchn_bind_vcpu {
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t vcpu;
+};
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;
+
+/*
+ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
+ * a notification to the appropriate VCPU if an event is pending.
+ */
+#define EVTCHNOP_unmask           9
+struct evtchn_unmask {
+    /* IN parameters. */
+    evtchn_port_t port;
+};
+typedef struct evtchn_unmask evtchn_unmask_t;
+
+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset           10
+struct evtchn_reset {
+    /* IN parameters. */
+    domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * Argument to event_channel_op_compat() hypercall. Superceded by new
+ * event_channel_op() hypercall since 0x00030202.
+ */
+struct evtchn_op {
+    uint32_t cmd; /* EVTCHNOP_* */
+    union {
+        struct evtchn_alloc_unbound    alloc_unbound;
+        struct evtchn_bind_interdomain bind_interdomain;
+        struct evtchn_bind_virq        bind_virq;
+        struct evtchn_bind_pirq        bind_pirq;
+        struct evtchn_bind_ipi         bind_ipi;
+        struct evtchn_close            close;
+        struct evtchn_send             send;
+        struct evtchn_status           status;
+        struct evtchn_bind_vcpu        bind_vcpu;
+        struct evtchn_unmask           unmask;
+    } u;
+};
+typedef struct evtchn_op evtchn_op_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);
+
+#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/event_channel.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/xen-compat.h
===================================================================
--- xen/interface/xen-compat.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/xen-compat.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,44 @@
+/******************************************************************************
+ * xen-compat.h
+ * 
+ * Guest OS interface to Xen.  Compatibility layer.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2006, Christian Limpach
+ */
+
+#ifndef __XEN_PUBLIC_XEN_COMPAT_H__
+#define __XEN_PUBLIC_XEN_COMPAT_H__
+
+#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209
+
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+/* Xen is built with matching headers and implements the latest interface. */
+#define __XEN_INTERFACE_VERSION__ __XEN_LATEST_INTERFACE_VERSION__
+#elif !defined(__XEN_INTERFACE_VERSION__)
+/* Guests which do not specify a version get the legacy interface. */
+#define __XEN_INTERFACE_VERSION__ 0x00000000
+#endif
+
+#if __XEN_INTERFACE_VERSION__ > __XEN_LATEST_INTERFACE_VERSION__
+#error "These header files do not support the requested interface version."
+#endif
+
+#endif /* __XEN_PUBLIC_XEN_COMPAT_H__ */

Property changes on: xen/interface/xen-compat.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/arch-ia64.h
===================================================================
--- xen/interface/arch-ia64.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/arch-ia64.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,621 @@
+/******************************************************************************
+ * arch-ia64/hypervisor-if.h
+ * 
+ * Guest OS interface to IA64 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "xen.h"
+
+#ifndef __HYPERVISOR_IF_IA64_H__
+#define __HYPERVISOR_IF_IA64_H__
+
+#if !defined(__GNUC__) || defined(__STRICT_ANSI__)
+#error "Anonymous structs/unions are a GNU extension."
+#endif
+
+/* Structural guest handles introduced in 0x00030201. */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030201
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef struct { type *p; } __guest_handle_ ## name
+#else
+#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \
+    typedef type * __guest_handle_ ## name
+#endif
+
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
+    ___DEFINE_XEN_GUEST_HANDLE(name, type);   \
+    ___DEFINE_XEN_GUEST_HANDLE(const_##name, const type)
+
+#define DEFINE_XEN_GUEST_HANDLE(name)   __DEFINE_XEN_GUEST_HANDLE(name, name)
+#define XEN_GUEST_HANDLE(name)          __guest_handle_ ## name
+#define XEN_GUEST_HANDLE_64(name)       XEN_GUEST_HANDLE(name)
+#define uint64_aligned_t                uint64_t
+#define set_xen_guest_handle(hnd, val)  do { (hnd).p = val; } while (0)
+#ifdef __XEN_TOOLS__
+#define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
+#endif
+
+#ifndef __ASSEMBLY__
+typedef unsigned long xen_pfn_t;
+#define PRI_xen_pfn "lx"
+#endif
+
+/* Arch specific VIRQs definition */
+#define VIRQ_ITC        VIRQ_ARCH_0 /* V. Virtual itc timer */
+#define VIRQ_MCA_CMC    VIRQ_ARCH_1 /* MCA cmc interrupt */
+#define VIRQ_MCA_CPE    VIRQ_ARCH_2 /* MCA cpe interrupt */
+
+/* Maximum number of virtual CPUs in multi-processor guests. */
+/* WARNING: before changing this, check that shared_info fits on a page */
+#define MAX_VIRT_CPUS 64
+
+/* IO ports location for PV.  */
+#define IO_PORTS_PADDR          0x00000ffffc000000UL
+#define IO_PORTS_SIZE           0x0000000004000000UL
+
+#ifndef __ASSEMBLY__
+
+typedef unsigned long xen_ulong_t;
+
+#ifdef __XEN_TOOLS__
+#define XEN_PAGE_SIZE XC_PAGE_SIZE
+#else
+#define XEN_PAGE_SIZE PAGE_SIZE
+#endif
+
+#define INVALID_MFN       (~0UL)
+
+struct pt_fpreg {
+    union {
+        unsigned long bits[2];
+        long double __dummy;    /* force 16-byte alignment */
+    } u;
+};
+
+union vac {
+    unsigned long value;
+    struct {
+        int a_int:1;
+        int a_from_int_cr:1;
+        int a_to_int_cr:1;
+        int a_from_psr:1;
+        int a_from_cpuid:1;
+        int a_cover:1;
+        int a_bsw:1;
+        long reserved:57;
+    };
+};
+typedef union vac vac_t;
+
+union vdc {
+    unsigned long value;
+    struct {
+        int d_vmsw:1;
+        int d_extint:1;
+        int d_ibr_dbr:1;
+        int d_pmc:1;
+        int d_to_pmd:1;
+        int d_itm:1;
+        long reserved:58;
+    };
+};
+typedef union vdc vdc_t;
+
+struct mapped_regs {
+    union vac   vac;
+    union vdc   vdc;
+    unsigned long  virt_env_vaddr;
+    unsigned long  reserved1[29];
+    unsigned long  vhpi;
+    unsigned long  reserved2[95];
+    union {
+        unsigned long  vgr[16];
+        unsigned long bank1_regs[16]; // bank1 regs (r16-r31) when bank0 active
+    };
+    union {
+        unsigned long  vbgr[16];
+        unsigned long bank0_regs[16]; // bank0 regs (r16-r31) when bank1 active
+    };
+    unsigned long  vnat;
+    unsigned long  vbnat;
+    unsigned long  vcpuid[5];
+    unsigned long  reserved3[11];
+    unsigned long  vpsr;
+    unsigned long  vpr;
+    unsigned long  reserved4[76];
+    union {
+        unsigned long  vcr[128];
+        struct {
+            unsigned long dcr;  // CR0
+            unsigned long itm;
+            unsigned long iva;
+            unsigned long rsv1[5];
+            unsigned long pta;  // CR8
+            unsigned long rsv2[7];
+            unsigned long ipsr;  // CR16
+            unsigned long isr;
+            unsigned long rsv3;
+            unsigned long iip;
+            unsigned long ifa;
+            unsigned long itir;
+            unsigned long iipa;
+            unsigned long ifs;
+            unsigned long iim;  // CR24
+            unsigned long iha;
+            unsigned long rsv4[38];
+            unsigned long lid;  // CR64
+            unsigned long ivr;
+            unsigned long tpr;
+            unsigned long eoi;
+            unsigned long irr[4];
+            unsigned long itv;  // CR72
+            unsigned long pmv;
+            unsigned long cmcv;
+            unsigned long rsv5[5];
+            unsigned long lrr0;  // CR80
+            unsigned long lrr1;
+            unsigned long rsv6[46];
+        };
+    };
+    union {
+        unsigned long  reserved5[128];
+        struct {
+            unsigned long precover_ifs;
+            unsigned long unat;  // not sure if this is needed until NaT arch is done
+            int interrupt_collection_enabled; // virtual psr.ic
+            /* virtual interrupt deliverable flag is evtchn_upcall_mask in
+             * shared info area now. interrupt_mask_addr is the address
+             * of evtchn_upcall_mask for current vcpu
+             */
+            unsigned char *interrupt_mask_addr;
+            int pending_interruption;
+            unsigned char vpsr_pp;
+            unsigned char vpsr_dfh;
+            unsigned char hpsr_dfh;
+            unsigned char hpsr_mfh;
+            unsigned long reserved5_1[4];
+            int metaphysical_mode; // 1 = use metaphys mapping, 0 = use virtual
+            int banknum; // 0 or 1, which virtual register bank is active
+            unsigned long rrs[8]; // region registers
+            unsigned long krs[8]; // kernel registers
+            unsigned long tmp[16]; // temp registers (e.g. for hyperprivops)
+        };
+    };
+};
+typedef struct mapped_regs mapped_regs_t;
+
+struct vpd {
+    struct mapped_regs vpd_low;
+    unsigned long  reserved6[3456];
+    unsigned long  vmm_avail[128];
+    unsigned long  reserved7[4096];
+};
+typedef struct vpd vpd_t;
+
+struct arch_vcpu_info {
+};
+typedef struct arch_vcpu_info arch_vcpu_info_t;
+
+/*
+ * This structure is used for magic page in domain pseudo physical address
+ * space and the result of XENMEM_machine_memory_map.
+ * As the XENMEM_machine_memory_map result,
+ * xen_memory_map::nr_entries indicates the size in bytes 
+ * including struct xen_ia64_memmap_info. Not the number of entries.
+ */
+struct xen_ia64_memmap_info {
+    uint64_t efi_memmap_size;       /* size of EFI memory map */
+    uint64_t efi_memdesc_size;      /* size of an EFI memory map descriptor */
+    uint32_t efi_memdesc_version;   /* memory descriptor version */
+    void *memdesc[0];               /* array of efi_memory_desc_t */
+};
+typedef struct xen_ia64_memmap_info xen_ia64_memmap_info_t;
+
+struct arch_shared_info {
+    /* PFN of the start_info page.  */
+    unsigned long start_info_pfn;
+
+    /* Interrupt vector for event channel.  */
+    int evtchn_vector;
+
+    /* PFN of memmap_info page */
+    unsigned int memmap_info_num_pages;/* currently only = 1 case is
+                                          supported. */
+    unsigned long memmap_info_pfn;
+
+    uint64_t pad[31];
+};
+typedef struct arch_shared_info arch_shared_info_t;
+
+typedef unsigned long xen_callback_t;
+
+struct ia64_tr_entry {
+    unsigned long pte;
+    unsigned long itir;
+    unsigned long vadr;
+    unsigned long rid;
+};
+typedef struct ia64_tr_entry ia64_tr_entry_t;
+DEFINE_XEN_GUEST_HANDLE(ia64_tr_entry_t);
+
+struct vcpu_tr_regs {
+    struct ia64_tr_entry itrs[12];
+    struct ia64_tr_entry dtrs[12];
+};
+
+union vcpu_ar_regs {
+    unsigned long ar[128];
+    struct {
+        unsigned long kr[8];
+        unsigned long rsv1[8];
+        unsigned long rsc;
+        unsigned long bsp;
+        unsigned long bspstore;
+        unsigned long rnat;
+        unsigned long rsv2;
+        unsigned long fcr;
+        unsigned long rsv3[2];
+        unsigned long eflag;
+        unsigned long csd;
+        unsigned long ssd;
+        unsigned long cflg;
+        unsigned long fsr;
+        unsigned long fir;
+        unsigned long fdr;
+        unsigned long rsv4;
+        unsigned long ccv; /* 32 */
+        unsigned long rsv5[3];
+        unsigned long unat;
+        unsigned long rsv6[3];
+        unsigned long fpsr;
+        unsigned long rsv7[3];
+        unsigned long itc;
+        unsigned long rsv8[3];
+        unsigned long ign1[16];
+        unsigned long pfs; /* 64 */
+        unsigned long lc;
+        unsigned long ec;
+        unsigned long rsv9[45];
+        unsigned long ign2[16];
+    };
+};
+
+union vcpu_cr_regs {
+    unsigned long cr[128];
+    struct {
+        unsigned long dcr;  // CR0
+        unsigned long itm;
+        unsigned long iva;
+        unsigned long rsv1[5];
+        unsigned long pta;  // CR8
+        unsigned long rsv2[7];
+        unsigned long ipsr;  // CR16
+        unsigned long isr;
+        unsigned long rsv3;
+        unsigned long iip;
+        unsigned long ifa;
+        unsigned long itir;
+        unsigned long iipa;
+        unsigned long ifs;
+        unsigned long iim;  // CR24
+        unsigned long iha;
+        unsigned long rsv4[38];
+        unsigned long lid;  // CR64
+        unsigned long ivr;
+        unsigned long tpr;
+        unsigned long eoi;
+        unsigned long irr[4];
+        unsigned long itv;  // CR72
+        unsigned long pmv;
+        unsigned long cmcv;
+        unsigned long rsv5[5];
+        unsigned long lrr0;  // CR80
+        unsigned long lrr1;
+        unsigned long rsv6[46];
+    };
+};
+
+struct vcpu_guest_context_regs {
+        unsigned long r[32];
+        unsigned long b[8];
+        unsigned long bank[16];
+        unsigned long ip;
+        unsigned long psr;
+        unsigned long cfm;
+        unsigned long pr;
+        unsigned int nats; /* NaT bits for r1-r31.  */
+        unsigned int bnats; /* Nat bits for banked registers.  */
+        union vcpu_ar_regs ar;
+        union vcpu_cr_regs cr;
+        struct pt_fpreg f[128];
+        unsigned long dbr[8];
+        unsigned long ibr[8];
+        unsigned long rr[8];
+        unsigned long pkr[16];
+
+        /* FIXME: cpuid,pmd,pmc */
+
+        unsigned long xip;
+        unsigned long xpsr;
+        unsigned long xfs;
+        unsigned long xr[4];
+
+        struct vcpu_tr_regs tr;
+
+        /* Physical registers in case of debug event.  */
+        unsigned long excp_iipa;
+        unsigned long excp_ifa;
+        unsigned long excp_isr;
+        unsigned int excp_vector;
+
+        /*
+         * The rbs is intended to be the image of the stacked registers still
+         * in the cpu (not yet stored in memory).  It is laid out as if it
+         * were written in memory at a 512 (64*8) aligned address + offset.
+         * rbs_voff is (offset / 8).  rbs_nat contains NaT bits for the
+         * remaining rbs registers.  rbs_rnat contains NaT bits for in memory
+         * rbs registers.
+         * Note: loadrs is 2**14 bytes == 2**11 slots.
+         */
+        unsigned int rbs_voff;
+        unsigned long rbs[2048];
+        unsigned long rbs_rnat;
+
+        /*
+         * RSE.N_STACKED_PHYS via PAL_RSE_INFO
+         * Strictly this isn't cpu context, but this value is necessary
+         * for domain save/restore. So is here.
+         */
+        unsigned long num_phys_stacked;
+};
+
+struct vcpu_guest_context {
+#define VGCF_EXTRA_REGS (1UL << 1)	/* Set extra regs.  */
+#define VGCF_SET_CR_IRR (1UL << 2)	/* Set cr_irr[0:3]. */
+#define VGCF_online     (1UL << 3)  /* make this vcpu online */
+    unsigned long flags;       /* VGCF_* flags */
+
+    struct vcpu_guest_context_regs regs;
+
+    unsigned long event_callback_ip;
+
+    /* xen doesn't share privregs pages with hvm domain so that this member
+     * doesn't make sense for hvm domain.
+     * ~0UL is already used for INVALID_P2M_ENTRY. */
+#define VGC_PRIVREGS_HVM       (~(-2UL))
+    unsigned long privregs_pfn;
+};
+typedef struct vcpu_guest_context vcpu_guest_context_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_guest_context_t);
+
+/* dom0 vp op */
+#define __HYPERVISOR_ia64_dom0vp_op     __HYPERVISOR_arch_0
+/*  Map io space in machine address to dom0 physical address space.
+    Currently physical assigned address equals to machine address.  */
+#define IA64_DOM0VP_ioremap             0
+
+/* Convert a pseudo physical page frame number to the corresponding
+   machine page frame number. If no page is assigned, INVALID_MFN or
+   GPFN_INV_MASK is returned depending on domain's non-vti/vti mode.  */
+#define IA64_DOM0VP_phystomach          1
+
+/* Convert a machine page frame number to the corresponding pseudo physical
+   page frame number of the caller domain.  */
+#define IA64_DOM0VP_machtophys          3
+
+/* Reserved for future use.  */
+#define IA64_DOM0VP_iounmap             4
+
+/* Unmap and free pages contained in the specified pseudo physical region.  */
+#define IA64_DOM0VP_zap_physmap         5
+
+/* Assign machine page frame to dom0's pseudo physical address space.  */
+#define IA64_DOM0VP_add_physmap         6
+
+/* expose the p2m table into domain */
+#define IA64_DOM0VP_expose_p2m          7
+
+/* xen perfmon */
+#define IA64_DOM0VP_perfmon             8
+
+/* gmfn version of IA64_DOM0VP_add_physmap */
+#define IA64_DOM0VP_add_physmap_with_gmfn       9
+
+/* get fpswa revision */
+#define IA64_DOM0VP_fpswa_revision      10
+
+/* Add an I/O port space range */
+#define IA64_DOM0VP_add_io_space        11
+
+/* expose the foreign domain's p2m table into privileged domain */
+#define IA64_DOM0VP_expose_foreign_p2m  12
+#define         IA64_DOM0VP_EFP_ALLOC_PTE       0x1 /* allocate p2m table */
+
+/* unexpose the foreign domain's p2m table into privileged domain */
+#define IA64_DOM0VP_unexpose_foreign_p2m        13
+
+// flags for page assignement to pseudo physical address space
+#define _ASSIGN_readonly                0
+#define ASSIGN_readonly                 (1UL << _ASSIGN_readonly)
+#define ASSIGN_writable                 (0UL << _ASSIGN_readonly) // dummy flag
+/* Internal only: memory attribute must be WC/UC/UCE.  */
+#define _ASSIGN_nocache                 1
+#define ASSIGN_nocache                  (1UL << _ASSIGN_nocache)
+// tlb tracking
+#define _ASSIGN_tlb_track               2
+#define ASSIGN_tlb_track                (1UL << _ASSIGN_tlb_track)
+/* Internal only: associated with PGC_allocated bit */
+#define _ASSIGN_pgc_allocated           3
+#define ASSIGN_pgc_allocated            (1UL << _ASSIGN_pgc_allocated)
+/* Page is an IO page.  */
+#define _ASSIGN_io                      4
+#define ASSIGN_io                       (1UL << _ASSIGN_io)
+
+/* This structure has the same layout of struct ia64_boot_param, defined in
+   <asm/system.h>.  It is redefined here to ease use.  */
+struct xen_ia64_boot_param {
+	unsigned long command_line;	/* physical address of cmd line args */
+	unsigned long efi_systab;	/* physical address of EFI system table */
+	unsigned long efi_memmap;	/* physical address of EFI memory map */
+	unsigned long efi_memmap_size;	/* size of EFI memory map */
+	unsigned long efi_memdesc_size;	/* size of an EFI memory map descriptor */
+	unsigned int  efi_memdesc_version;	/* memory descriptor version */
+	struct {
+		unsigned short num_cols;	/* number of columns on console.  */
+		unsigned short num_rows;	/* number of rows on console.  */
+		unsigned short orig_x;	/* cursor's x position */
+		unsigned short orig_y;	/* cursor's y position */
+	} console_info;
+	unsigned long fpswa;		/* physical address of the fpswa interface */
+	unsigned long initrd_start;
+	unsigned long initrd_size;
+	unsigned long domain_start;	/* va where the boot time domain begins */
+	unsigned long domain_size;	/* how big is the boot domain */
+};
+
+#endif /* !__ASSEMBLY__ */
+
+/* Size of the shared_info area (this is not related to page size).  */
+#define XSI_SHIFT			14
+#define XSI_SIZE			(1 << XSI_SHIFT)
+/* Log size of mapped_regs area (64 KB - only 4KB is used).  */
+#define XMAPPEDREGS_SHIFT		12
+#define XMAPPEDREGS_SIZE		(1 << XMAPPEDREGS_SHIFT)
+/* Offset of XASI (Xen arch shared info) wrt XSI_BASE.  */
+#define XMAPPEDREGS_OFS			XSI_SIZE
+
+/* Hyperprivops.  */
+#define HYPERPRIVOP_START		0x1
+#define HYPERPRIVOP_RFI			(HYPERPRIVOP_START + 0x0)
+#define HYPERPRIVOP_RSM_DT		(HYPERPRIVOP_START + 0x1)
+#define HYPERPRIVOP_SSM_DT		(HYPERPRIVOP_START + 0x2)
+#define HYPERPRIVOP_COVER		(HYPERPRIVOP_START + 0x3)
+#define HYPERPRIVOP_ITC_D		(HYPERPRIVOP_START + 0x4)
+#define HYPERPRIVOP_ITC_I		(HYPERPRIVOP_START + 0x5)
+#define HYPERPRIVOP_SSM_I		(HYPERPRIVOP_START + 0x6)
+#define HYPERPRIVOP_GET_IVR		(HYPERPRIVOP_START + 0x7)
+#define HYPERPRIVOP_GET_TPR		(HYPERPRIVOP_START + 0x8)
+#define HYPERPRIVOP_SET_TPR		(HYPERPRIVOP_START + 0x9)
+#define HYPERPRIVOP_EOI			(HYPERPRIVOP_START + 0xa)
+#define HYPERPRIVOP_SET_ITM		(HYPERPRIVOP_START + 0xb)
+#define HYPERPRIVOP_THASH		(HYPERPRIVOP_START + 0xc)
+#define HYPERPRIVOP_PTC_GA		(HYPERPRIVOP_START + 0xd)
+#define HYPERPRIVOP_ITR_D		(HYPERPRIVOP_START + 0xe)
+#define HYPERPRIVOP_GET_RR		(HYPERPRIVOP_START + 0xf)
+#define HYPERPRIVOP_SET_RR		(HYPERPRIVOP_START + 0x10)
+#define HYPERPRIVOP_SET_KR		(HYPERPRIVOP_START + 0x11)
+#define HYPERPRIVOP_FC			(HYPERPRIVOP_START + 0x12)
+#define HYPERPRIVOP_GET_CPUID		(HYPERPRIVOP_START + 0x13)
+#define HYPERPRIVOP_GET_PMD		(HYPERPRIVOP_START + 0x14)
+#define HYPERPRIVOP_GET_EFLAG		(HYPERPRIVOP_START + 0x15)
+#define HYPERPRIVOP_SET_EFLAG		(HYPERPRIVOP_START + 0x16)
+#define HYPERPRIVOP_RSM_BE		(HYPERPRIVOP_START + 0x17)
+#define HYPERPRIVOP_GET_PSR		(HYPERPRIVOP_START + 0x18)
+#define HYPERPRIVOP_SET_RR0_TO_RR4	(HYPERPRIVOP_START + 0x19)
+#define HYPERPRIVOP_MAX			(0x1a)
+
+/* Fast and light hypercalls.  */
+#define __HYPERVISOR_ia64_fast_eoi	__HYPERVISOR_arch_1
+
+/* Extra debug features.  */
+#define __HYPERVISOR_ia64_debug_op  __HYPERVISOR_arch_2
+
+/* Xencomm macros.  */
+#define XENCOMM_INLINE_MASK 0xf800000000000000UL
+#define XENCOMM_INLINE_FLAG 0x8000000000000000UL
+
+#ifndef __ASSEMBLY__
+
+/*
+ * Optimization features.
+ * The hypervisor may do some special optimizations for guests. This hypercall
+ * can be used to switch on/of these special optimizations.
+ */
+#define __HYPERVISOR_opt_feature	0x700UL
+
+#define XEN_IA64_OPTF_OFF	0x0
+#define XEN_IA64_OPTF_ON	0x1
+
+/*
+ * If this feature is switched on, the hypervisor inserts the
+ * tlb entries without calling the guests traphandler.
+ * This is useful in guests using region 7 for identity mapping
+ * like the linux kernel does.
+ */
+#define XEN_IA64_OPTF_IDENT_MAP_REG7    1
+
+/* Identity mapping of region 4 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG4    2
+
+/* Identity mapping of region 5 addresses in HVM. */
+#define XEN_IA64_OPTF_IDENT_MAP_REG5    3
+
+#define XEN_IA64_OPTF_IDENT_MAP_NOT_SET  (0)
+
+struct xen_ia64_opt_feature {
+	unsigned long cmd;		/* Which feature */
+	unsigned char on;		/* Switch feature on/off */
+	union {
+		struct {
+				/* The page protection bit mask of the pte.
+			 	 * This will be or'ed with the pte. */
+			unsigned long pgprot;
+			unsigned long key;	/* A protection key for itir. */
+		};
+	};
+};
+
+#endif /* __ASSEMBLY__ */
+
+/* xen perfmon */
+#ifdef XEN
+#ifndef __ASSEMBLY__
+#ifndef _ASM_IA64_PERFMON_H
+
+#include <xen/list.h>   // asm/perfmon.h requires struct list_head
+#include <asm/perfmon.h>
+// for PFM_xxx and pfarg_features_t, pfarg_context_t, pfarg_reg_t, pfarg_load_t
+
+#endif /* _ASM_IA64_PERFMON_H */
+
+DEFINE_XEN_GUEST_HANDLE(pfarg_features_t);
+DEFINE_XEN_GUEST_HANDLE(pfarg_context_t);
+DEFINE_XEN_GUEST_HANDLE(pfarg_reg_t);
+DEFINE_XEN_GUEST_HANDLE(pfarg_load_t);
+#endif /* __ASSEMBLY__ */
+#endif /* XEN */
+
+#ifndef __ASSEMBLY__
+#include "arch-ia64/hvm/memmap.h"
+#endif
+
+#endif /* __HYPERVISOR_IF_IA64_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/arch-ia64.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/dom0_ops.h
===================================================================
--- xen/interface/dom0_ops.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/dom0_ops.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,120 @@
+/******************************************************************************
+ * dom0_ops.h
+ * 
+ * Process command requests from domain-0 guest OS.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOM0_OPS_H__
+#define __XEN_PUBLIC_DOM0_OPS_H__
+
+#include "xen.h"
+#include "platform.h"
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030204
+#error "dom0_ops.h is a compatibility interface only"
+#endif
+
+#define DOM0_INTERFACE_VERSION XENPF_INTERFACE_VERSION
+
+#define DOM0_SETTIME          XENPF_settime
+#define dom0_settime          xenpf_settime
+#define dom0_settime_t        xenpf_settime_t
+
+#define DOM0_ADD_MEMTYPE      XENPF_add_memtype
+#define dom0_add_memtype      xenpf_add_memtype
+#define dom0_add_memtype_t    xenpf_add_memtype_t
+
+#define DOM0_DEL_MEMTYPE      XENPF_del_memtype
+#define dom0_del_memtype      xenpf_del_memtype
+#define dom0_del_memtype_t    xenpf_del_memtype_t
+
+#define DOM0_READ_MEMTYPE     XENPF_read_memtype
+#define dom0_read_memtype     xenpf_read_memtype
+#define dom0_read_memtype_t   xenpf_read_memtype_t
+
+#define DOM0_MICROCODE        XENPF_microcode_update
+#define dom0_microcode        xenpf_microcode_update
+#define dom0_microcode_t      xenpf_microcode_update_t
+
+#define DOM0_PLATFORM_QUIRK   XENPF_platform_quirk
+#define dom0_platform_quirk   xenpf_platform_quirk
+#define dom0_platform_quirk_t xenpf_platform_quirk_t
+
+typedef uint64_t cpumap_t;
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_MSR                 15
+struct dom0_msr {
+    /* IN variables. */
+    uint32_t write;
+    cpumap_t cpu_mask;
+    uint32_t msr;
+    uint32_t in1;
+    uint32_t in2;
+    /* OUT variables. */
+    uint32_t out1;
+    uint32_t out2;
+};
+typedef struct dom0_msr dom0_msr_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_msr_t);
+
+/* Unsupported legacy operation -- defined for API compatibility. */
+#define DOM0_PHYSICAL_MEMORY_MAP 40
+struct dom0_memory_map_entry {
+    uint64_t start, end;
+    uint32_t flags; /* reserved */
+    uint8_t  is_ram;
+};
+typedef struct dom0_memory_map_entry dom0_memory_map_entry_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_memory_map_entry_t);
+
+struct dom0_op {
+    uint32_t cmd;
+    uint32_t interface_version; /* DOM0_INTERFACE_VERSION */
+    union {
+        struct dom0_msr               msr;
+        struct dom0_settime           settime;
+        struct dom0_add_memtype       add_memtype;
+        struct dom0_del_memtype       del_memtype;
+        struct dom0_read_memtype      read_memtype;
+        struct dom0_microcode         microcode;
+        struct dom0_platform_quirk    platform_quirk;
+        struct dom0_memory_map_entry  physical_memory_map;
+        uint8_t                       pad[128];
+    } u;
+};
+typedef struct dom0_op dom0_op_t;
+DEFINE_XEN_GUEST_HANDLE(dom0_op_t);
+
+#endif /* __XEN_PUBLIC_DOM0_OPS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/dom0_ops.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/acm_ops.h
===================================================================
--- xen/interface/acm_ops.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/acm_ops.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,159 @@
+/*
+ * acm_ops.h: Xen access control module hypervisor commands
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Reiner Sailer <sailer@watson.ibm.com>
+ * Copyright (c) 2005,2006 International Business Machines Corporation.
+ */
+
+#ifndef __XEN_PUBLIC_ACM_OPS_H__
+#define __XEN_PUBLIC_ACM_OPS_H__
+
+#include "xen.h"
+#include "acm.h"
+
+/*
+ * Make sure you increment the interface version whenever you modify this file!
+ * This makes sure that old versions of acm tools will stop working in a
+ * well-defined way (rather than crashing the machine, for instance).
+ */
+#define ACM_INTERFACE_VERSION   0xAAAA000A
+
+/************************************************************************/
+
+/*
+ * Prototype for this hypercall is:
+ *  int acm_op(int cmd, void *args)
+ * @cmd  == ACMOP_??? (access control module operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
+
+#define ACMOP_setpolicy         1
+struct acm_setpolicy {
+    /* IN */
+    XEN_GUEST_HANDLE_64(void) pushcache;
+    uint32_t pushcache_size;
+};
+
+
+#define ACMOP_getpolicy         2
+struct acm_getpolicy {
+    /* IN */
+    XEN_GUEST_HANDLE_64(void) pullcache;
+    uint32_t pullcache_size;
+};
+
+
+#define ACMOP_dumpstats         3
+struct acm_dumpstats {
+    /* IN */
+    XEN_GUEST_HANDLE_64(void) pullcache;
+    uint32_t pullcache_size;
+};
+
+
+#define ACMOP_getssid           4
+#define ACM_GETBY_ssidref  1
+#define ACM_GETBY_domainid 2
+struct acm_getssid {
+    /* IN */
+    uint32_t get_ssid_by; /* ACM_GETBY_* */
+    union {
+        domaintype_t domainid;
+        ssidref_t    ssidref;
+    } id;
+    XEN_GUEST_HANDLE_64(void) ssidbuf;
+    uint32_t ssidbuf_size;
+};
+
+#define ACMOP_getdecision      5
+struct acm_getdecision {
+    /* IN */
+    uint32_t get_decision_by1; /* ACM_GETBY_* */
+    uint32_t get_decision_by2; /* ACM_GETBY_* */
+    union {
+        domaintype_t domainid;
+        ssidref_t    ssidref;
+    } id1;
+    union {
+        domaintype_t domainid;
+        ssidref_t    ssidref;
+    } id2;
+    uint32_t hook;
+    /* OUT */
+    uint32_t acm_decision;
+};
+
+
+#define ACMOP_chgpolicy        6
+struct acm_change_policy {
+    /* IN */
+    XEN_GUEST_HANDLE_64(void) policy_pushcache;
+    uint32_t policy_pushcache_size;
+    XEN_GUEST_HANDLE_64(void) del_array;
+    uint32_t delarray_size;
+    XEN_GUEST_HANDLE_64(void) chg_array;
+    uint32_t chgarray_size;
+    /* OUT */
+    /* array with error code */
+    XEN_GUEST_HANDLE_64(void) err_array;
+    uint32_t errarray_size;
+};
+
+#define ACMOP_relabeldoms       7
+struct acm_relabel_doms {
+    /* IN */
+    XEN_GUEST_HANDLE_64(void) relabel_map;
+    uint32_t relabel_map_size;
+    /* OUT */
+    XEN_GUEST_HANDLE_64(void) err_array;
+    uint32_t errarray_size;
+};
+
+/* future interface to Xen */
+struct xen_acmctl {
+    uint32_t cmd;
+    uint32_t interface_version;
+    union {
+        struct acm_setpolicy     setpolicy;
+        struct acm_getpolicy     getpolicy;
+        struct acm_dumpstats     dumpstats;
+        struct acm_getssid       getssid;
+        struct acm_getdecision   getdecision;
+        struct acm_change_policy change_policy;
+        struct acm_relabel_doms  relabel_doms;
+    } u;
+};
+
+typedef struct xen_acmctl xen_acmctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_acmctl_t);
+
+#endif /* __XEN_PUBLIC_ACM_OPS_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/acm_ops.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/pciif.h
===================================================================
--- xen/interface/io/pciif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/pciif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,101 @@
+/*
+ * PCI Backend/Frontend Common Data Structures & Macros
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ *   Author: Ryan Wilson <hap9@epoch.ncsc.mil>
+ */
+#ifndef __XEN_PCI_COMMON_H__
+#define __XEN_PCI_COMMON_H__
+
+/* Be sure to bump this number if you change this file */
+#define XEN_PCI_MAGIC "7"
+
+/* xen_pci_sharedinfo flags */
+#define _XEN_PCIF_active     (0)
+#define XEN_PCIF_active      (1<<_XEN_PCI_active)
+
+/* xen_pci_op commands */
+#define XEN_PCI_OP_conf_read    (0)
+#define XEN_PCI_OP_conf_write   (1)
+#define XEN_PCI_OP_enable_msi   (2)
+#define XEN_PCI_OP_disable_msi  (3)
+#define XEN_PCI_OP_enable_msix  (4)
+#define XEN_PCI_OP_disable_msix (5)
+
+/* xen_pci_op error numbers */
+#define XEN_PCI_ERR_success          (0)
+#define XEN_PCI_ERR_dev_not_found   (-1)
+#define XEN_PCI_ERR_invalid_offset  (-2)
+#define XEN_PCI_ERR_access_denied   (-3)
+#define XEN_PCI_ERR_not_implemented (-4)
+/* XEN_PCI_ERR_op_failed - backend failed to complete the operation */
+#define XEN_PCI_ERR_op_failed       (-5)
+
+/*
+ * it should be PAGE_SIZE-sizeof(struct xen_pci_op))/sizeof(struct msix_entry))
+ * Should not exceed 128
+ */
+#define SH_INFO_MAX_VEC     128
+
+struct xen_msix_entry {
+    uint16_t vector;
+    uint16_t entry;
+};
+struct xen_pci_op {
+    /* IN: what action to perform: XEN_PCI_OP_* */
+    uint32_t cmd;
+
+    /* OUT: will contain an error number (if any) from errno.h */
+    int32_t err;
+
+    /* IN: which device to touch */
+    uint32_t domain; /* PCI Domain/Segment */
+    uint32_t bus;
+    uint32_t devfn;
+
+    /* IN: which configuration registers to touch */
+    int32_t offset;
+    int32_t size;
+
+    /* IN/OUT: Contains the result after a READ or the value to WRITE */
+    uint32_t value;
+    /* IN: Contains extra infor for this operation */
+    uint32_t info;
+    /*IN:  param for msi-x */
+    struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
+};
+
+struct xen_pci_sharedinfo {
+    /* flags - XEN_PCIF_* */
+    uint32_t flags;
+    struct xen_pci_op op;
+};
+
+#endif /* __XEN_PCI_COMMON_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/pciif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/kbdif.h
===================================================================
--- xen/interface/io/kbdif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/kbdif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,132 @@
+/*
+ * kbdif.h -- Xen virtual keyboard/mouse
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_KBDIF_H__
+#define __XEN_PUBLIC_IO_KBDIF_H__
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/* Pointer movement event */
+#define XENKBD_TYPE_MOTION  1
+/* Event type 2 currently not used */
+/* Key event (includes pointer buttons) */
+#define XENKBD_TYPE_KEY     3
+/*
+ * Pointer position event
+ * Capable backend sets feature-abs-pointer in xenstore.
+ * Frontend requests ot instead of XENKBD_TYPE_MOTION by setting
+ * request-abs-update in xenstore.
+ */
+#define XENKBD_TYPE_POS     4
+
+struct xenkbd_motion
+{
+    uint8_t type;        /* XENKBD_TYPE_MOTION */
+    int32_t rel_x;       /* relative X motion */
+    int32_t rel_y;       /* relative Y motion */
+    int32_t rel_z;       /* relative Z motion (wheel) */
+};
+
+struct xenkbd_key
+{
+    uint8_t type;         /* XENKBD_TYPE_KEY */
+    uint8_t pressed;      /* 1 if pressed; 0 otherwise */
+    uint32_t keycode;     /* KEY_* from linux/input.h */
+};
+
+struct xenkbd_position
+{
+    uint8_t type;        /* XENKBD_TYPE_POS */
+    int32_t abs_x;       /* absolute X position (in FB pixels) */
+    int32_t abs_y;       /* absolute Y position (in FB pixels) */
+    int32_t rel_z;       /* relative Z motion (wheel) */
+};
+
+#define XENKBD_IN_EVENT_SIZE 40
+
+union xenkbd_in_event
+{
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    char pad[XENKBD_IN_EVENT_SIZE];
+};
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ * No out events currently defined.
+ */
+
+#define XENKBD_OUT_EVENT_SIZE 40
+
+union xenkbd_out_event
+{
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENKBD_IN_RING_SIZE 2048
+#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
+#define XENKBD_IN_RING_OFFS 1024
+#define XENKBD_IN_RING(page) \
+    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+#define XENKBD_IN_RING_REF(page, idx) \
+    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+
+#define XENKBD_OUT_RING_SIZE 1024
+#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
+#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
+#define XENKBD_OUT_RING(page) \
+    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+#define XENKBD_OUT_RING_REF(page, idx) \
+    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+
+struct xenkbd_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+};
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/kbdif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/ring.h
===================================================================
--- xen/interface/io/ring.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/ring.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,307 @@
+/******************************************************************************
+ * ring.h
+ * 
+ * Shared producer-consumer ring macros.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Tim Deegan and Andrew Warfield November 2004.
+ */
+
+#ifndef __XEN_PUBLIC_IO_RING_H__
+#define __XEN_PUBLIC_IO_RING_H__
+
+#include "../xen-compat.h"
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#define xen_mb()  mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#endif
+
+typedef unsigned int RING_IDX;
+
+/* Round a 32-bit unsigned constant down to the nearest power of two. */
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
+#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
+#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
+#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
+#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
+
+/*
+ * Calculate size of a shared ring, given the total available space for the
+ * ring and indexes (_sz), and the name tag of the request/response structure.
+ * A ring contains as many entries as will fit, rounded down to the nearest 
+ * power of two (so we can mask with (size-1) to loop around).
+ */
+#define __RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+
+/*
+ * Macros to make the correct C datatypes for a new kind of ring.
+ * 
+ * To make a new ring datatype, you need to have two message structures,
+ * let's say request_t, and response_t already defined.
+ *
+ * In a header where you want the ring datatype declared, you then do:
+ *
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
+ *
+ * These expand out to give you a set of types, as you can see below.
+ * The most important of these are:
+ * 
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
+ *
+ * To initialize a ring in your code you need to know the location and size
+ * of the shared memory area (PAGE_SIZE, for instance). To initialise
+ * the front half:
+ *
+ *     mytag_front_ring_t front_ring;
+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ *
+ * Initializing the back follows similarly (note that only the front
+ * initializes the shared ring):
+ *
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
+ */
+
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
+                                                                        \
+/* Shared ring entry */                                                 \
+union __name##_sring_entry {                                            \
+    __req_t req;                                                        \
+    __rsp_t rsp;                                                        \
+};                                                                      \
+                                                                        \
+/* Shared ring page */                                                  \
+struct __name##_sring {                                                 \
+    RING_IDX req_prod, req_event;                                       \
+    RING_IDX rsp_prod, rsp_event;                                       \
+    uint8_t  pad[48];                                                   \
+    union __name##_sring_entry ring[1]; /* variable-length */           \
+};                                                                      \
+                                                                        \
+/* "Front" end's private variables */                                   \
+struct __name##_front_ring {                                            \
+    RING_IDX req_prod_pvt;                                              \
+    RING_IDX rsp_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* "Back" end's private variables */                                    \
+struct __name##_back_ring {                                             \
+    RING_IDX rsp_prod_pvt;                                              \
+    RING_IDX req_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* Syntactic sugar */                                                   \
+typedef struct __name##_sring __name##_sring_t;                         \
+typedef struct __name##_front_ring __name##_front_ring_t;               \
+typedef struct __name##_back_ring __name##_back_ring_t
+
+/*
+ * Macros for manipulating rings.
+ * 
+ * FRONT_RING_whatever works on the "front end" of a ring: here 
+ * requests are pushed on to the ring and responses taken off it.
+ * 
+ * BACK_RING_whatever works on the "back end" of a ring: here 
+ * requests are taken off the ring and responses put on.
+ * 
+ * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. 
+ * This is OK in 1-for-1 request-response situations where the 
+ * requestor (front end) never has more than RING_SIZE()-1
+ * outstanding requests.
+ */
+
+/* Initialising empty rings */
+#define SHARED_RING_INIT(_s) do {                                       \
+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
+    (_s)->req_event = (_s)->rsp_event = 1;                              \
+    (void)memset((_s)->pad, 0, sizeof((_s)->pad));                      \
+} while(0)
+
+#define FRONT_RING_INIT(_r, _s, __size) do {                            \
+    (_r)->req_prod_pvt = 0;                                             \
+    (_r)->rsp_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+#define BACK_RING_INIT(_r, _s, __size) do {                             \
+    (_r)->rsp_prod_pvt = 0;                                             \
+    (_r)->req_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
+} while (0)
+
+/* Initialize to existing shared indexes -- for recovery */
+#define FRONT_RING_ATTACH(_r, _s, __size) do {                          \
+    (_r)->sring = (_s);                                                 \
+    (_r)->req_prod_pvt = (_s)->req_prod;                                \
+    (_r)->rsp_cons = (_s)->rsp_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+#define BACK_RING_ATTACH(_r, _s, __size) do {                           \
+    (_r)->sring = (_s);                                                 \
+    (_r)->rsp_prod_pvt = (_s)->rsp_prod;                                \
+    (_r)->req_cons = (_s)->req_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+} while (0)
+
+/* How big is this ring? */
+#define RING_SIZE(_r)                                                   \
+    ((_r)->nr_ents)
+
+/* Number of free requests (for use on front side only). */
+#define RING_FREE_REQUESTS(_r)                                          \
+    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
+
+/* Test if there is an empty slot available on the front ring.
+ * (This is only meaningful from the front. )
+ */
+#define RING_FULL(_r)                                                   \
+    (RING_FREE_REQUESTS(_r) == 0)
+
+/* Test if there are outstanding messages to be processed on a ring. */
+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
+    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
+
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
+    unsigned int rsp = RING_SIZE(_r) -                                  \
+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
+    req < rsp ? req : rsp;                                              \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif
+
+/* Direct access to individual ring elements, by index. */
+#define RING_GET_REQUEST(_r, _idx)                                      \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
+
+#define RING_GET_RESPONSE(_r, _idx)                                     \
+    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
+
+/* Loop termination condition: Would the specified index overflow the ring? */
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
+    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
+
+#define RING_PUSH_REQUESTS(_r) do {                                     \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
+} while (0)
+
+#define RING_PUSH_RESPONSES(_r) do {                                    \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
+} while (0)
+
+/*
+ * Notification hold-off (req_event and rsp_event):
+ * 
+ * When queueing requests or responses on a shared ring, it may not always be
+ * necessary to notify the remote end. For example, if requests are in flight
+ * in a backend, the front may be able to queue further requests without
+ * notifying the back (if the back checks for new requests when it queues
+ * responses).
+ * 
+ * When enqueuing requests or responses:
+ * 
+ *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
+ *  is a boolean return value. True indicates that the receiver requires an
+ *  asynchronous notification.
+ * 
+ * After dequeuing requests or responses (before sleeping the connection):
+ * 
+ *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
+ *  The second argument is a boolean return value. True indicates that there
+ *  are pending messages on the ring (i.e., the connection should not be put
+ *  to sleep).
+ * 
+ *  These macros will set the req_event/rsp_event field to trigger a
+ *  notification on the very next message that is enqueued. If you want to
+ *  create batches of work (i.e., only receive a notification after several
+ *  messages have been enqueued) then you will need to create a customised
+ *  version of the FINAL_CHECK macro in your own code, which sets the event
+ *  field appropriately.
+ */
+
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
+    RING_IDX __old = (_r)->sring->req_prod;                             \
+    RING_IDX __new = (_r)->req_prod_pvt;                                \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = __new;                                      \
+    xen_mb(); /* back sees new requests /before/ we check req_event */  \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
+    RING_IDX __old = (_r)->sring->rsp_prod;                             \
+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = __new;                                      \
+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
+                 (RING_IDX)(__new - __old));                            \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+} while (0)
+
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+} while (0)
+
+#endif /* __XEN_PUBLIC_IO_RING_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/ring.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/tpmif.h
===================================================================
--- xen/interface/io/tpmif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/tpmif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,77 @@
+/******************************************************************************
+ * tpmif.h
+ *
+ * TPM I/O interface for Xen guest OSes.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, IBM Corporation
+ *
+ * Author: Stefan Berger, stefanb@us.ibm.com
+ * Grant table support: Mahadevan Gomathisankaran
+ *
+ * This code has been derived from tools/libxc/xen/io/netif.h
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_TPMIF_H__
+#define __XEN_PUBLIC_IO_TPMIF_H__
+
+#include "../grant_table.h"
+
+struct tpmif_tx_request {
+    unsigned long addr;   /* Machine address of packet.   */
+    grant_ref_t ref;      /* grant table access reference */
+    uint16_t unused;
+    uint16_t size;        /* Packet size in bytes.        */
+};
+typedef struct tpmif_tx_request tpmif_tx_request_t;
+
+/*
+ * The TPMIF_TX_RING_SIZE defines the number of pages the
+ * front-end and backend can exchange (= size of array).
+ */
+typedef uint32_t TPMIF_RING_IDX;
+
+#define TPMIF_TX_RING_SIZE 1
+
+/* This structure must fit in a memory page. */
+
+struct tpmif_ring {
+    struct tpmif_tx_request req;
+};
+typedef struct tpmif_ring tpmif_ring_t;
+
+struct tpmif_tx_interface {
+    struct tpmif_ring ring[TPMIF_TX_RING_SIZE];
+};
+typedef struct tpmif_tx_interface tpmif_tx_interface_t;
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/tpmif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/xs_wire.h
===================================================================
--- xen/interface/io/xs_wire.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/xs_wire.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,130 @@
+/*
+ * Details of the "wire" protocol between Xen Store Daemon and client
+ * library or guest kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Rusty Russell IBM Corporation
+ */
+
+#ifndef _XS_WIRE_H
+#define _XS_WIRE_H
+
+enum xsd_sockmsg_type
+{
+    XS_DEBUG,
+    XS_DIRECTORY,
+    XS_READ,
+    XS_GET_PERMS,
+    XS_WATCH,
+    XS_UNWATCH,
+    XS_TRANSACTION_START,
+    XS_TRANSACTION_END,
+    XS_INTRODUCE,
+    XS_RELEASE,
+    XS_GET_DOMAIN_PATH,
+    XS_WRITE,
+    XS_MKDIR,
+    XS_RM,
+    XS_SET_PERMS,
+    XS_WATCH_EVENT,
+    XS_ERROR,
+    XS_IS_DOMAIN_INTRODUCED,
+    XS_RESUME,
+    XS_SET_TARGET
+};
+
+#define XS_WRITE_NONE "NONE"
+#define XS_WRITE_CREATE "CREATE"
+#define XS_WRITE_CREATE_EXCL "CREATE|EXCL"
+
+/* We hand errors as strings, for portability. */
+struct xsd_errors
+{
+    int errnum;
+    const char *errstring;
+};
+#define XSD_ERROR(x) { x, #x }
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+    = {
+    XSD_ERROR(EINVAL),
+    XSD_ERROR(EACCES),
+    XSD_ERROR(EEXIST),
+    XSD_ERROR(EISDIR),
+    XSD_ERROR(ENOENT),
+    XSD_ERROR(ENOMEM),
+    XSD_ERROR(ENOSPC),
+    XSD_ERROR(EIO),
+    XSD_ERROR(ENOTEMPTY),
+    XSD_ERROR(ENOSYS),
+    XSD_ERROR(EROFS),
+    XSD_ERROR(EBUSY),
+    XSD_ERROR(EAGAIN),
+    XSD_ERROR(EISCONN)
+};
+
+struct xsd_sockmsg
+{
+    uint32_t type;  /* XS_??? */
+    uint32_t req_id;/* Request identifier, echoed in daemon's response.  */
+    uint32_t tx_id; /* Transaction id (0 if not related to a transaction). */
+    uint32_t len;   /* Length of data following this. */
+
+    /* Generally followed by nul-terminated string(s). */
+};
+
+enum xs_watch_type
+{
+    XS_WATCH_PATH = 0,
+    XS_WATCH_TOKEN
+};
+
+/* Inter-domain shared memory communications. */
+#define XENSTORE_RING_SIZE 1024
+typedef uint32_t XENSTORE_RING_IDX;
+#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE-1))
+struct xenstore_domain_interface {
+    char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */
+    char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */
+    XENSTORE_RING_IDX req_cons, req_prod;
+    XENSTORE_RING_IDX rsp_cons, rsp_prod;
+};
+
+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
+#endif /* _XS_WIRE_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/xs_wire.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/console.h
===================================================================
--- xen/interface/io/console.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/console.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,51 @@
+/******************************************************************************
+ * console.h
+ * 
+ * Console I/O interface for Xen guest OSes.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
+#define __XEN_PUBLIC_IO_CONSOLE_H__
+
+typedef uint32_t XENCONS_RING_IDX;
+
+#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
+
+struct xencons_interface {
+    char in[1024];
+    char out[2048];
+    XENCONS_RING_IDX in_cons, in_prod;
+    XENCONS_RING_IDX out_cons, out_prod;
+};
+
+#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/console.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/protocols.h
===================================================================
--- xen/interface/io/protocols.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/protocols.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,40 @@
+/******************************************************************************
+ * protocols.h
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PROTOCOLS_H__
+#define __XEN_PROTOCOLS_H__
+
+#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
+#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
+#define XEN_IO_PROTO_ABI_IA64       "ia64-abi"
+
+#if defined(__i386__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
+#elif defined(__x86_64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
+#elif defined(__ia64__)
+# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
+#else
+# error arch fixup needed here
+#endif
+
+#endif

Property changes on: xen/interface/io/protocols.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/xenbus.h
===================================================================
--- xen/interface/io/xenbus.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/xenbus.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,80 @@
+/*****************************************************************************
+ * xenbus.h
+ *
+ * Xenbus protocol details.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 XenSource Ltd.
+ */
+
+#ifndef _XEN_PUBLIC_IO_XENBUS_H
+#define _XEN_PUBLIC_IO_XENBUS_H
+
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
+
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,
+
+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed        = 6,
+
+    /*
+     * Reconfiguring: The device is being reconfigured.
+     */
+    XenbusStateReconfiguring = 7,
+
+    XenbusStateReconfigured  = 8
+};
+typedef enum xenbus_state XenbusState;
+
+#endif /* _XEN_PUBLIC_IO_XENBUS_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/xenbus.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/netif.h
===================================================================
--- xen/interface/io/netif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/netif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,205 @@
+/******************************************************************************
+ * netif.h
+ * 
+ * Unified network-device I/O interface for Xen guest OSes.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_NETIF_H__
+#define __XEN_PUBLIC_IO_NETIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * Notifications after enqueuing any type of message should be conditional on
+ * the appropriate req_event or rsp_event field in the shared ring.
+ * If the client sends notification for rx requests then it should specify
+ * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
+ * that it cannot safely queue packets (as it may not be kicked to send them).
+ */
+
+/*
+ * This is the 'wire' format for packets:
+ *  Request 1: netif_tx_request -- NETTXF_* (any flags)
+ * [Request 2: netif_tx_extra]  (only if request 1 has NETTXF_extra_info)
+ * [Request 3: netif_tx_extra]  (only if request 2 has XEN_NETIF_EXTRA_MORE)
+ *  Request 4: netif_tx_request -- NETTXF_more_data
+ *  Request 5: netif_tx_request -- NETTXF_more_data
+ *  ...
+ *  Request N: netif_tx_request -- 0
+ */
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETTXF_csum_blank     (0)
+#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETTXF_data_validated (1)
+#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
+
+/* Packet continues in the next request descriptor. */
+#define _NETTXF_more_data      (2)
+#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETTXF_extra_info     (3)
+#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
+
+struct netif_tx_request {
+    grant_ref_t gref;      /* Reference to buffer page */
+    uint16_t offset;       /* Offset within buffer page */
+    uint16_t flags;        /* NETTXF_* */
+    uint16_t id;           /* Echoed in response message. */
+    uint16_t size;         /* Packet size in bytes.       */
+};
+typedef struct netif_tx_request netif_tx_request_t;
+
+/* Types of netif_extra_info descriptors. */
+#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MAX       (4)
+
+/* netif_extra_info flags. */
+#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
+#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
+
+/* GSO types - only TCPv4 currently supported. */
+#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
+
+/*
+ * This structure needs to fit within both netif_tx_request and
+ * netif_rx_response for compatibility.
+ */
+struct netif_extra_info {
+    uint8_t type;  /* XEN_NETIF_EXTRA_TYPE_* */
+    uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
+
+    union {
+        /*
+         * XEN_NETIF_EXTRA_TYPE_GSO:
+         */
+        struct {
+            /*
+             * Maximum payload size of each segment. For example, for TCP this
+             * is just the path MSS.
+             */
+            uint16_t size;
+
+            /*
+             * GSO type. This determines the protocol of the packet and any
+             * extra features required to segment the packet properly.
+             */
+            uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
+
+            /* Future expansion. */
+            uint8_t pad;
+
+            /*
+             * GSO features. This specifies any extra GSO features required
+             * to process this packet, such as ECN support for TCPv4.
+             */
+            uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
+        } gso;
+
+        /*
+         * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+         * Backend advertises availability via 'feature-multicast-control'
+         * xenbus node containing value '1'.
+         * Frontend requests this feature by advertising
+         * 'request-multicast-control' xenbus node containing value '1'.
+         * If multicast control is requested then multicast flooding is
+         * disabled and the frontend must explicitly register its interest
+         * in multicast groups using dummy transmit requests containing
+         * MCAST_{ADD,DEL} extra-info fragments.
+         */
+        struct {
+            uint8_t addr[6]; /* Address to add/remove. */
+        } mcast;
+
+        uint16_t pad[3];
+    } u;
+};
+typedef struct netif_extra_info netif_extra_info_t;
+
+struct netif_tx_response {
+    uint16_t id;
+    int16_t  status;       /* NETIF_RSP_* */
+};
+typedef struct netif_tx_response netif_tx_response_t;
+
+struct netif_rx_request {
+    uint16_t    id;        /* Echoed in response message.        */
+    grant_ref_t gref;      /* Reference to incoming granted frame */
+};
+typedef struct netif_rx_request netif_rx_request_t;
+
+/* Packet data has been validated against protocol checksum. */
+#define _NETRXF_data_validated (0)
+#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
+
+/* Protocol checksum field is blank in the packet (hardware offload)? */
+#define _NETRXF_csum_blank     (1)
+#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
+
+/* Packet continues in the next request descriptor. */
+#define _NETRXF_more_data      (2)
+#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
+
+/* Packet to be followed by extra descriptor(s). */
+#define _NETRXF_extra_info     (3)
+#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
+
+struct netif_rx_response {
+    uint16_t id;
+    uint16_t offset;       /* Offset in page of start of received packet  */
+    uint16_t flags;        /* NETRXF_* */
+    int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
+};
+typedef struct netif_rx_response netif_rx_response_t;
+
+/*
+ * Generate netif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
+
+#define NETIF_RSP_DROPPED         -2
+#define NETIF_RSP_ERROR           -1
+#define NETIF_RSP_OKAY             0
+/* No response: used for auxiliary requests (e.g., netif_tx_extra). */
+#define NETIF_RSP_NULL             1
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/netif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/fbif.h
===================================================================
--- xen/interface/io/fbif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/fbif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,176 @@
+/*
+ * fbif.h -- Xen virtual frame buffer device
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
+ * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
+ */
+
+#ifndef __XEN_PUBLIC_IO_FBIF_H__
+#define __XEN_PUBLIC_IO_FBIF_H__
+
+/* Out events (frontend -> backend) */
+
+/*
+ * Out events may be sent only when requested by backend, and receipt
+ * of an unknown out event is an error.
+ */
+
+/* Event type 1 currently not used */
+/*
+ * Framebuffer update notification event
+ * Capable frontend sets feature-update in xenstore.
+ * Backend requests it by setting request-update in xenstore.
+ */
+#define XENFB_TYPE_UPDATE 2
+
+struct xenfb_update
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
+};
+
+/*
+ * Framebuffer resize notification event
+ * Capable backend sets feature-resize in xenstore.
+ */
+#define XENFB_TYPE_RESIZE 3
+
+struct xenfb_resize
+{
+    uint8_t type;    /* XENFB_TYPE_RESIZE */
+    int32_t width;   /* width in pixels */
+    int32_t height;  /* height in pixels */
+    int32_t stride;  /* stride in bytes */
+    int32_t depth;   /* depth in bits */
+    int32_t offset;  /* offset of the framebuffer in bytes */
+};
+
+#define XENFB_OUT_EVENT_SIZE 40
+
+union xenfb_out_event
+{
+    uint8_t type;
+    struct xenfb_update update;
+    struct xenfb_resize resize;
+    char pad[XENFB_OUT_EVENT_SIZE];
+};
+
+/* In events (backend -> frontend) */
+
+/*
+ * Frontends should ignore unknown in events.
+ */
+
+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh.  Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it.  Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */
+    uint32_t period; /* period of refresh, in ms,
+                      * XENFB_NO_REFRESH if no refresh is needed */
+};
+
+#define XENFB_IN_EVENT_SIZE 40
+
+union xenfb_in_event
+{
+    uint8_t type;
+    struct xenfb_refresh_period refresh_period;
+    char pad[XENFB_IN_EVENT_SIZE];
+};
+
+/* shared page */
+
+#define XENFB_IN_RING_SIZE 1024
+#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
+#define XENFB_IN_RING_OFFS 1024
+#define XENFB_IN_RING(page) \
+    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+#define XENFB_IN_RING_REF(page, idx) \
+    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+
+#define XENFB_OUT_RING_SIZE 2048
+#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
+#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
+#define XENFB_OUT_RING(page) \
+    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+#define XENFB_OUT_RING_REF(page, idx) \
+    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+
+struct xenfb_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
+
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+     * 64 bit.  256 directories give enough room for a 512 Meg
+     * framebuffer with a max resolution of 12,800x10,240.  Should
+     * be enough for a while with room leftover for expansion.
+     */
+    unsigned long pd[256];
+};
+
+/*
+ * Wart: xenkbd needs to know default resolution.  Put it here until a
+ * better solution is found, but don't leak it to the backend.
+ */
+#ifdef __KERNEL__
+#define XENFB_WIDTH 800
+#define XENFB_HEIGHT 600
+#define XENFB_DEPTH 32
+#endif
+
+#endif
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/fbif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/io/blkif.h
===================================================================
--- xen/interface/io/blkif.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/io/blkif.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,141 @@
+/******************************************************************************
+ * blkif.h
+ * 
+ * Unified block-device I/O interface for Xen guest OSes.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2003-2004, Keir Fraser
+ */
+
+#ifndef __XEN_PUBLIC_IO_BLKIF_H__
+#define __XEN_PUBLIC_IO_BLKIF_H__
+
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
+
+/*
+ * Front->back notifications: When enqueuing a new request, sending a
+ * notification can be made conditional on req_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Backends must set
+ * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
+ * 
+ * Back->front notifications: When enqueuing a new response, sending a
+ * notification can be made conditional on rsp_event (i.e., the generic
+ * hold-off mechanism provided by the ring macros). Frontends must set
+ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
+ */
+
+#ifndef blkif_vdev_t
+#define blkif_vdev_t   uint16_t
+#endif
+#define blkif_sector_t uint64_t
+
+/*
+ * REQUEST CODES.
+ */
+#define BLKIF_OP_READ              0
+#define BLKIF_OP_WRITE             1
+/*
+ * Recognised only if "feature-barrier" is present in backend xenbus info.
+ * The "feature-barrier" node contains a boolean indicating whether barrier
+ * requests are likely to succeed or fail. Either way, a barrier request
+ * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
+ * the underlying block-device hardware. The boolean simply indicates whether
+ * or not it is worthwhile for the frontend to attempt barrier requests.
+ * If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
+ * create the "feature-barrier" node!
+ */
+#define BLKIF_OP_WRITE_BARRIER     2
+/*
+ * Recognised if "feature-flush-cache" is present in backend xenbus
+ * info.  A flush will ask the underlying storage hardware to flush its
+ * non-volatile caches as appropriate.  The "feature-flush-cache" node
+ * contains a boolean indicating whether flush requests are likely to
+ * succeed or fail. Either way, a flush request may fail at any time
+ * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
+ * block-device hardware. The boolean simply indicates whether or not it
+ * is worthwhile for the frontend to attempt flushes.  If a backend does
+ * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
+ * "feature-flush-cache" node!
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3
+
+/*
+ * Maximum scatter/gather segments per request.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
+ * NB. This could be 12 if the ring indexes weren't stored in the same page.
+ */
+#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+
+struct blkif_request_segment {
+    grant_ref_t gref;        /* reference to I/O buffer frame        */
+    /* @first_sect: first sector in frame to transfer (inclusive).   */
+    /* @last_sect: last sector in frame to transfer (inclusive).     */
+    uint8_t     first_sect, last_sect;
+};
+
+struct blkif_request {
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct blkif_request blkif_request_t;
+
+struct blkif_response {
+    uint64_t        id;              /* copied from request */
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
+};
+typedef struct blkif_response blkif_response_t;
+
+/*
+ * STATUS RETURN CODES.
+ */
+ /* Operation not supported (only happens on barrier writes). */
+#define BLKIF_RSP_EOPNOTSUPP  -2
+ /* Operation failed for some unspecified reason (-EIO). */
+#define BLKIF_RSP_ERROR       -1
+ /* Operation completed successfully. */
+#define BLKIF_RSP_OKAY         0
+
+/*
+ * Generate blkif ring structures and types.
+ */
+
+DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
+
+#define VDISK_CDROM        0x1
+#define VDISK_REMOVABLE    0x2
+#define VDISK_READONLY     0x4
+
+#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/io/blkif.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true


Property changes on: xen/interface/io
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/sysctl.h
===================================================================
--- xen/interface/sysctl.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/sysctl.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,308 @@
+/******************************************************************************
+ * sysctl.h
+ * 
+ * System management operations. For use by node control stack.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_SYSCTL_H__
+#define __XEN_PUBLIC_SYSCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "sysctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+#include "domctl.h"
+
+#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006
+
+/*
+ * Read console content from Xen buffer ring.
+ */
+#define XEN_SYSCTL_readconsole       1
+struct xen_sysctl_readconsole {
+    /* IN: Non-zero -> clear after reading. */
+    uint8_t clear;
+    /* IN: Non-zero -> start index specified by @index field. */
+    uint8_t incremental;
+    uint8_t pad0, pad1;
+    /*
+     * IN:  Start index for consuming from ring buffer (if @incremental);
+     * OUT: End index after consuming from ring buffer.
+     */
+    uint32_t index; 
+    /* IN: Virtual address to write console data. */
+    XEN_GUEST_HANDLE_64(char) buffer;
+    /* IN: Size of buffer; OUT: Bytes written to buffer. */
+    uint32_t count;
+};
+typedef struct xen_sysctl_readconsole xen_sysctl_readconsole_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t);
+
+/* Get trace buffers machine base address */
+#define XEN_SYSCTL_tbuf_op           2
+struct xen_sysctl_tbuf_op {
+    /* IN variables */
+#define XEN_SYSCTL_TBUFOP_get_info     0
+#define XEN_SYSCTL_TBUFOP_set_cpu_mask 1
+#define XEN_SYSCTL_TBUFOP_set_evt_mask 2
+#define XEN_SYSCTL_TBUFOP_set_size     3
+#define XEN_SYSCTL_TBUFOP_enable       4
+#define XEN_SYSCTL_TBUFOP_disable      5
+    uint32_t cmd;
+    /* IN/OUT variables */
+    struct xenctl_cpumap cpu_mask;
+    uint32_t             evt_mask;
+    /* OUT variables */
+    uint64_aligned_t buffer_mfn;
+    uint32_t size;
+};
+typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t);
+
+/*
+ * Get physical information about the host machine
+ */
+#define XEN_SYSCTL_physinfo          3
+ /* (x86) The platform supports HVM guests. */
+#define _XEN_SYSCTL_PHYSCAP_hvm          0
+#define XEN_SYSCTL_PHYSCAP_hvm           (1u<<_XEN_SYSCTL_PHYSCAP_hvm)
+ /* (x86) The platform supports HVM-guest direct access to I/O devices. */
+#define _XEN_SYSCTL_PHYSCAP_hvm_directio 1
+#define XEN_SYSCTL_PHYSCAP_hvm_directio  (1u<<_XEN_SYSCTL_PHYSCAP_hvm_directio)
+struct xen_sysctl_physinfo {
+    uint32_t threads_per_core;
+    uint32_t cores_per_socket;
+    uint32_t nr_cpus;
+    uint32_t nr_nodes;
+    uint32_t cpu_khz;
+    uint64_aligned_t total_pages;
+    uint64_aligned_t free_pages;
+    uint64_aligned_t scrub_pages;
+    uint32_t hw_cap[8];
+
+    /*
+     * IN: maximum addressable entry in the caller-provided cpu_to_node array.
+     * OUT: largest cpu identifier in the system.
+     * If OUT is greater than IN then the cpu_to_node array is truncated!
+     */
+    uint32_t max_cpu_id;
+    /*
+     * If not NULL, this array is filled with node identifier for each cpu.
+     * If a cpu has no node information (e.g., cpu not present) then the
+     * sentinel value ~0u is written.
+     * The size of this array is specified by the caller in @max_cpu_id.
+     * If the actual @max_cpu_id is smaller than the array then the trailing
+     * elements of the array will not be written by the sysctl.
+     */
+    XEN_GUEST_HANDLE_64(uint32) cpu_to_node;
+
+    /* XEN_SYSCTL_PHYSCAP_??? */
+    uint32_t capabilities;
+};
+typedef struct xen_sysctl_physinfo xen_sysctl_physinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_physinfo_t);
+
+/*
+ * Get the ID of the current scheduler.
+ */
+#define XEN_SYSCTL_sched_id          4
+struct xen_sysctl_sched_id {
+    /* OUT variable */
+    uint32_t sched_id;
+};
+typedef struct xen_sysctl_sched_id xen_sysctl_sched_id_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t);
+
+/* Interface for controlling Xen software performance counters. */
+#define XEN_SYSCTL_perfc_op          5
+/* Sub-operations: */
+#define XEN_SYSCTL_PERFCOP_reset 1   /* Reset all counters to zero. */
+#define XEN_SYSCTL_PERFCOP_query 2   /* Get perfctr information. */
+struct xen_sysctl_perfc_desc {
+    char         name[80];             /* name of perf counter */
+    uint32_t     nr_vals;              /* number of values for this counter */
+};
+typedef struct xen_sysctl_perfc_desc xen_sysctl_perfc_desc_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_desc_t);
+typedef uint32_t xen_sysctl_perfc_val_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_val_t);
+
+struct xen_sysctl_perfc_op {
+    /* IN variables. */
+    uint32_t       cmd;                /*  XEN_SYSCTL_PERFCOP_??? */
+    /* OUT variables. */
+    uint32_t       nr_counters;       /*  number of counters description  */
+    uint32_t       nr_vals;           /*  number of values  */
+    /* counter information (or NULL) */
+    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_desc_t) desc;
+    /* counter values (or NULL) */
+    XEN_GUEST_HANDLE_64(xen_sysctl_perfc_val_t) val;
+};
+typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t);
+
+#define XEN_SYSCTL_getdomaininfolist 6
+struct xen_sysctl_getdomaininfolist {
+    /* IN variables. */
+    domid_t               first_domain;
+    uint32_t              max_domains;
+    XEN_GUEST_HANDLE_64(xen_domctl_getdomaininfo_t) buffer;
+    /* OUT variables. */
+    uint32_t              num_domains;
+};
+typedef struct xen_sysctl_getdomaininfolist xen_sysctl_getdomaininfolist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t);
+
+/* Inject debug keys into Xen. */
+#define XEN_SYSCTL_debug_keys        7
+struct xen_sysctl_debug_keys {
+    /* IN variables. */
+    XEN_GUEST_HANDLE_64(char) keys;
+    uint32_t nr_keys;
+};
+typedef struct xen_sysctl_debug_keys xen_sysctl_debug_keys_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t);
+
+/* Get physical CPU information. */
+#define XEN_SYSCTL_getcpuinfo        8
+struct xen_sysctl_cpuinfo {
+    uint64_aligned_t idletime;
+};
+typedef struct xen_sysctl_cpuinfo xen_sysctl_cpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpuinfo_t); 
+struct xen_sysctl_getcpuinfo {
+    /* IN variables. */
+    uint32_t max_cpus;
+    XEN_GUEST_HANDLE_64(xen_sysctl_cpuinfo_t) info;
+    /* OUT variables. */
+    uint32_t nr_cpus;
+}; 
+typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); 
+
+#define XEN_SYSCTL_availheap         9
+struct xen_sysctl_availheap {
+    /* IN variables. */
+    uint32_t min_bitwidth;  /* Smallest address width (zero if don't care). */
+    uint32_t max_bitwidth;  /* Largest address width (zero if don't care). */
+    int32_t  node;          /* NUMA node of interest (-1 for all nodes). */
+    /* OUT variables. */
+    uint64_aligned_t avail_bytes;/* Bytes available in the specified region. */
+};
+typedef struct xen_sysctl_availheap xen_sysctl_availheap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t);
+
+#define XEN_SYSCTL_get_pmstat        10
+struct pm_px_val {
+    uint64_aligned_t freq;        /* Px core frequency */
+    uint64_aligned_t residency;   /* Px residency time */
+    uint64_aligned_t count;       /* Px transition count */
+};
+typedef struct pm_px_val pm_px_val_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_val_t);
+
+struct pm_px_stat {
+    uint8_t total;        /* total Px states */
+    uint8_t usable;       /* usable Px states */
+    uint8_t last;         /* last Px state */
+    uint8_t cur;          /* current Px state */
+    XEN_GUEST_HANDLE_64(uint64) trans_pt;   /* Px transition table */
+    XEN_GUEST_HANDLE_64(pm_px_val_t) pt;
+};
+typedef struct pm_px_stat pm_px_stat_t;
+DEFINE_XEN_GUEST_HANDLE(pm_px_stat_t);
+
+struct pm_cx_stat {
+    uint32_t nr;    /* entry nr in triggers & residencies, including C0 */
+    uint32_t last;  /* last Cx state */
+    uint64_aligned_t idle_time;                 /* idle time from boot */
+    XEN_GUEST_HANDLE_64(uint64) triggers;    /* Cx trigger counts */
+    XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */
+};
+
+struct xen_sysctl_get_pmstat {
+#define PMSTAT_CATEGORY_MASK 0xf0
+#define PMSTAT_PX            0x10
+#define PMSTAT_CX            0x20
+#define PMSTAT_get_max_px    (PMSTAT_PX | 0x1)
+#define PMSTAT_get_pxstat    (PMSTAT_PX | 0x2)
+#define PMSTAT_reset_pxstat  (PMSTAT_PX | 0x3)
+#define PMSTAT_get_max_cx    (PMSTAT_CX | 0x1)
+#define PMSTAT_get_cxstat    (PMSTAT_CX | 0x2)
+#define PMSTAT_reset_cxstat  (PMSTAT_CX | 0x3)
+    uint32_t type;
+    uint32_t cpuid;
+    union {
+        struct pm_px_stat getpx;
+        struct pm_cx_stat getcx;
+        /* other struct for tx, etc */
+    } u;
+};
+typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
+
+#define XEN_SYSCTL_cpu_hotplug       11
+struct xen_sysctl_cpu_hotplug {
+    /* IN variables */
+    uint32_t cpu;   /* Physical cpu. */
+#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE  0
+#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1
+    uint32_t op;    /* hotplug opcode */
+};
+typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
+
+
+struct xen_sysctl {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
+    union {
+        struct xen_sysctl_readconsole       readconsole;
+        struct xen_sysctl_tbuf_op           tbuf_op;
+        struct xen_sysctl_physinfo          physinfo;
+        struct xen_sysctl_sched_id          sched_id;
+        struct xen_sysctl_perfc_op          perfc_op;
+        struct xen_sysctl_getdomaininfolist getdomaininfolist;
+        struct xen_sysctl_debug_keys        debug_keys;
+        struct xen_sysctl_getcpuinfo        getcpuinfo;
+        struct xen_sysctl_availheap         availheap;
+        struct xen_sysctl_get_pmstat        get_pmstat;
+        struct xen_sysctl_cpu_hotplug       cpu_hotplug;
+        uint8_t                             pad[128];
+    } u;
+};
+typedef struct xen_sysctl xen_sysctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_sysctl_t);
+
+#endif /* __XEN_PUBLIC_SYSCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/sysctl.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/domctl.h
===================================================================
--- xen/interface/domctl.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/domctl.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,677 @@
+/******************************************************************************
+ * domctl.h
+ * 
+ * Domain management operations. For use by node control stack.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2002-2003, B Dragovic
+ * Copyright (c) 2002-2006, K Fraser
+ */
+
+#ifndef __XEN_PUBLIC_DOMCTL_H__
+#define __XEN_PUBLIC_DOMCTL_H__
+
+#if !defined(__XEN__) && !defined(__XEN_TOOLS__)
+#error "domctl operations are intended for use by node control tools only"
+#endif
+
+#include "xen.h"
+
+#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005
+
+struct xenctl_cpumap {
+    XEN_GUEST_HANDLE_64(uint8_t) bitmap;
+    uint32_t nr_cpus;
+};
+
+/*
+ * NB. xen_domctl.domain is an IN/OUT parameter for this operation.
+ * If it is specified as zero, an id is auto-allocated and returned.
+ */
+#define XEN_DOMCTL_createdomain       1
+struct xen_domctl_createdomain {
+    /* IN parameters */
+    uint32_t ssidref;
+    xen_domain_handle_t handle;
+ /* Is this an HVM guest (as opposed to a PV guest)? */
+#define _XEN_DOMCTL_CDF_hvm_guest 0
+#define XEN_DOMCTL_CDF_hvm_guest  (1U<<_XEN_DOMCTL_CDF_hvm_guest)
+ /* Use hardware-assisted paging if available? */
+#define _XEN_DOMCTL_CDF_hap       1
+#define XEN_DOMCTL_CDF_hap        (1U<<_XEN_DOMCTL_CDF_hap)
+    uint32_t flags;
+};
+typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t);
+
+#define XEN_DOMCTL_destroydomain      2
+#define XEN_DOMCTL_pausedomain        3
+#define XEN_DOMCTL_unpausedomain      4
+#define XEN_DOMCTL_resumedomain      27
+
+#define XEN_DOMCTL_getdomaininfo      5
+struct xen_domctl_getdomaininfo {
+    /* OUT variables. */
+    domid_t  domain;              /* Also echoed in domctl.domain */
+ /* Domain is scheduled to die. */
+#define _XEN_DOMINF_dying     0
+#define XEN_DOMINF_dying      (1U<<_XEN_DOMINF_dying)
+ /* Domain is an HVM guest (as opposed to a PV guest). */
+#define _XEN_DOMINF_hvm_guest 1
+#define XEN_DOMINF_hvm_guest  (1U<<_XEN_DOMINF_hvm_guest)
+ /* The guest OS has shut down. */
+#define _XEN_DOMINF_shutdown  2
+#define XEN_DOMINF_shutdown   (1U<<_XEN_DOMINF_shutdown)
+ /* Currently paused by control software. */
+#define _XEN_DOMINF_paused    3
+#define XEN_DOMINF_paused     (1U<<_XEN_DOMINF_paused)
+ /* Currently blocked pending an event.     */
+#define _XEN_DOMINF_blocked   4
+#define XEN_DOMINF_blocked    (1U<<_XEN_DOMINF_blocked)
+ /* Domain is currently running.            */
+#define _XEN_DOMINF_running   5
+#define XEN_DOMINF_running    (1U<<_XEN_DOMINF_running)
+ /* Being debugged.  */
+#define _XEN_DOMINF_debugged  6
+#define XEN_DOMINF_debugged   (1U<<_XEN_DOMINF_debugged)
+ /* CPU to which this domain is bound.      */
+#define XEN_DOMINF_cpumask      255
+#define XEN_DOMINF_cpushift       8
+ /* XEN_DOMINF_shutdown guest-supplied code.  */
+#define XEN_DOMINF_shutdownmask 255
+#define XEN_DOMINF_shutdownshift 16
+    uint32_t flags;              /* XEN_DOMINF_* */
+    uint64_aligned_t tot_pages;
+    uint64_aligned_t max_pages;
+    uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */
+    uint64_aligned_t cpu_time;
+    uint32_t nr_online_vcpus;    /* Number of VCPUs currently online. */
+    uint32_t max_vcpu_id;        /* Maximum VCPUID in use by this domain. */
+    uint32_t ssidref;
+    xen_domain_handle_t handle;
+};
+typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t);
+
+
+#define XEN_DOMCTL_getmemlist         6
+struct xen_domctl_getmemlist {
+    /* IN variables. */
+    /* Max entries to write to output buffer. */
+    uint64_aligned_t max_pfns;
+    /* Start index in guest's page list. */
+    uint64_aligned_t start_pfn;
+    XEN_GUEST_HANDLE_64(uint64_t) buffer;
+    /* OUT variables. */
+    uint64_aligned_t num_pfns;
+};
+typedef struct xen_domctl_getmemlist xen_domctl_getmemlist_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t);
+
+
+#define XEN_DOMCTL_getpageframeinfo   7
+
+#define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28
+#define XEN_DOMCTL_PFINFO_NOTAB   (0x0U<<28)
+#define XEN_DOMCTL_PFINFO_L1TAB   (0x1U<<28)
+#define XEN_DOMCTL_PFINFO_L2TAB   (0x2U<<28)
+#define XEN_DOMCTL_PFINFO_L3TAB   (0x3U<<28)
+#define XEN_DOMCTL_PFINFO_L4TAB   (0x4U<<28)
+#define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28)
+#define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31)
+#define XEN_DOMCTL_PFINFO_XTAB    (0xfU<<28) /* invalid page */
+#define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28)
+
+struct xen_domctl_getpageframeinfo {
+    /* IN variables. */
+    uint64_aligned_t gmfn; /* GMFN to query */
+    /* OUT variables. */
+    /* Is the page PINNED to a type? */
+    uint32_t type;         /* see above type defs */
+};
+typedef struct xen_domctl_getpageframeinfo xen_domctl_getpageframeinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t);
+
+
+#define XEN_DOMCTL_getpageframeinfo2  8
+struct xen_domctl_getpageframeinfo2 {
+    /* IN variables. */
+    uint64_aligned_t num;
+    /* IN/OUT variables. */
+    XEN_GUEST_HANDLE_64(uint32_t) array;
+};
+typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t);
+
+
+/*
+ * Control shadow pagetables operation
+ */
+#define XEN_DOMCTL_shadow_op         10
+
+/* Disable shadow mode. */
+#define XEN_DOMCTL_SHADOW_OP_OFF         0
+
+/* Enable shadow mode (mode contains ORed XEN_DOMCTL_SHADOW_ENABLE_* flags). */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE      32
+
+/* Log-dirty bitmap operations. */
+ /* Return the bitmap and clean internal copy for next round. */
+#define XEN_DOMCTL_SHADOW_OP_CLEAN       11
+ /* Return the bitmap but do not modify internal copy. */
+#define XEN_DOMCTL_SHADOW_OP_PEEK        12
+
+/* Memory allocation accessors. */
+#define XEN_DOMCTL_SHADOW_OP_GET_ALLOCATION   30
+#define XEN_DOMCTL_SHADOW_OP_SET_ALLOCATION   31
+
+/* Legacy enable operations. */
+ /* Equiv. to ENABLE with no mode flags. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TEST       1
+ /* Equiv. to ENABLE with mode flag ENABLE_LOG_DIRTY. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY   2
+ /* Equiv. to ENABLE with mode flags ENABLE_REFCOUNT and ENABLE_TRANSLATE. */
+#define XEN_DOMCTL_SHADOW_OP_ENABLE_TRANSLATE  3
+
+/* Mode flags for XEN_DOMCTL_SHADOW_OP_ENABLE. */
+ /*
+  * Shadow pagetables are refcounted: guest does not use explicit mmu
+  * operations nor write-protect its pagetables.
+  */
+#define XEN_DOMCTL_SHADOW_ENABLE_REFCOUNT  (1 << 1)
+ /*
+  * Log pages in a bitmap as they are dirtied.
+  * Used for live relocation to determine which pages must be re-sent.
+  */
+#define XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY (1 << 2)
+ /*
+  * Automatically translate GPFNs into MFNs.
+  */
+#define XEN_DOMCTL_SHADOW_ENABLE_TRANSLATE (1 << 3)
+ /*
+  * Xen does not steal virtual address space from the guest.
+  * Requires HVM support.
+  */
+#define XEN_DOMCTL_SHADOW_ENABLE_EXTERNAL  (1 << 4)
+
+struct xen_domctl_shadow_op_stats {
+    uint32_t fault_count;
+    uint32_t dirty_count;
+};
+typedef struct xen_domctl_shadow_op_stats xen_domctl_shadow_op_stats_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_stats_t);
+
+struct xen_domctl_shadow_op {
+    /* IN variables. */
+    uint32_t       op;       /* XEN_DOMCTL_SHADOW_OP_* */
+
+    /* OP_ENABLE */
+    uint32_t       mode;     /* XEN_DOMCTL_SHADOW_ENABLE_* */
+
+    /* OP_GET_ALLOCATION / OP_SET_ALLOCATION */
+    uint32_t       mb;       /* Shadow memory allocation in MB */
+
+    /* OP_PEEK / OP_CLEAN */
+    XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap;
+    uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */
+    struct xen_domctl_shadow_op_stats stats;
+};
+typedef struct xen_domctl_shadow_op xen_domctl_shadow_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t);
+
+
+#define XEN_DOMCTL_max_mem           11
+struct xen_domctl_max_mem {
+    /* IN variables. */
+    uint64_aligned_t max_memkb;
+};
+typedef struct xen_domctl_max_mem xen_domctl_max_mem_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t);
+
+
+#define XEN_DOMCTL_setvcpucontext    12
+#define XEN_DOMCTL_getvcpucontext    13
+struct xen_domctl_vcpucontext {
+    uint32_t              vcpu;                  /* IN */
+    XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpucontext xen_domctl_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t);
+
+
+#define XEN_DOMCTL_getvcpuinfo       14
+struct xen_domctl_getvcpuinfo {
+    /* IN variables. */
+    uint32_t vcpu;
+    /* OUT variables. */
+    uint8_t  online;                  /* currently online (not hotplugged)? */
+    uint8_t  blocked;                 /* blocked waiting for an event? */
+    uint8_t  running;                 /* currently scheduled on its CPU? */
+    uint64_aligned_t cpu_time;        /* total cpu time consumed (ns) */
+    uint32_t cpu;                     /* current mapping   */
+};
+typedef struct xen_domctl_getvcpuinfo xen_domctl_getvcpuinfo_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_getvcpuinfo_t);
+
+
+/* Get/set which physical cpus a vcpu can execute on. */
+#define XEN_DOMCTL_setvcpuaffinity    9
+#define XEN_DOMCTL_getvcpuaffinity   25
+struct xen_domctl_vcpuaffinity {
+    uint32_t  vcpu;              /* IN */
+    struct xenctl_cpumap cpumap; /* IN/OUT */
+};
+typedef struct xen_domctl_vcpuaffinity xen_domctl_vcpuaffinity_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t);
+
+
+#define XEN_DOMCTL_max_vcpus         15
+struct xen_domctl_max_vcpus {
+    uint32_t max;           /* maximum number of vcpus */
+};
+typedef struct xen_domctl_max_vcpus xen_domctl_max_vcpus_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t);
+
+
+#define XEN_DOMCTL_scheduler_op      16
+/* Scheduler types. */
+#define XEN_SCHEDULER_SEDF     4
+#define XEN_SCHEDULER_CREDIT   5
+/* Set or get info? */
+#define XEN_DOMCTL_SCHEDOP_putinfo 0
+#define XEN_DOMCTL_SCHEDOP_getinfo 1
+struct xen_domctl_scheduler_op {
+    uint32_t sched_id;  /* XEN_SCHEDULER_* */
+    uint32_t cmd;       /* XEN_DOMCTL_SCHEDOP_* */
+    union {
+        struct xen_domctl_sched_sedf {
+            uint64_aligned_t period;
+            uint64_aligned_t slice;
+            uint64_aligned_t latency;
+            uint32_t extratime;
+            uint32_t weight;
+        } sedf;
+        struct xen_domctl_sched_credit {
+            uint16_t weight;
+            uint16_t cap;
+        } credit;
+    } u;
+};
+typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t);
+
+
+#define XEN_DOMCTL_setdomainhandle   17
+struct xen_domctl_setdomainhandle {
+    xen_domain_handle_t handle;
+};
+typedef struct xen_domctl_setdomainhandle xen_domctl_setdomainhandle_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t);
+
+
+#define XEN_DOMCTL_setdebugging      18
+struct xen_domctl_setdebugging {
+    uint8_t enable;
+};
+typedef struct xen_domctl_setdebugging xen_domctl_setdebugging_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t);
+
+
+#define XEN_DOMCTL_irq_permission    19
+struct xen_domctl_irq_permission {
+    uint8_t pirq;
+    uint8_t allow_access;    /* flag to specify enable/disable of IRQ access */
+};
+typedef struct xen_domctl_irq_permission xen_domctl_irq_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t);
+
+
+#define XEN_DOMCTL_iomem_permission  20
+struct xen_domctl_iomem_permission {
+    uint64_aligned_t first_mfn;/* first page (physical page number) in range */
+    uint64_aligned_t nr_mfns;  /* number of pages in range (>0) */
+    uint8_t  allow_access;     /* allow (!0) or deny (0) access to range? */
+};
+typedef struct xen_domctl_iomem_permission xen_domctl_iomem_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t);
+
+
+#define XEN_DOMCTL_ioport_permission 21
+struct xen_domctl_ioport_permission {
+    uint32_t first_port;              /* first port int range */
+    uint32_t nr_ports;                /* size of port range */
+    uint8_t  allow_access;            /* allow or deny access to range? */
+};
+typedef struct xen_domctl_ioport_permission xen_domctl_ioport_permission_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t);
+
+
+#define XEN_DOMCTL_hypercall_init    22
+struct xen_domctl_hypercall_init {
+    uint64_aligned_t  gmfn;           /* GMFN to be initialised */
+};
+typedef struct xen_domctl_hypercall_init xen_domctl_hypercall_init_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t);
+
+
+#define XEN_DOMCTL_arch_setup        23
+#define _XEN_DOMAINSETUP_hvm_guest 0
+#define XEN_DOMAINSETUP_hvm_guest  (1UL<<_XEN_DOMAINSETUP_hvm_guest)
+#define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save)  */
+#define XEN_DOMAINSETUP_query  (1UL<<_XEN_DOMAINSETUP_query)
+#define _XEN_DOMAINSETUP_sioemu_guest 2
+#define XEN_DOMAINSETUP_sioemu_guest  (1UL<<_XEN_DOMAINSETUP_sioemu_guest)
+typedef struct xen_domctl_arch_setup {
+    uint64_aligned_t flags;  /* XEN_DOMAINSETUP_* */
+#ifdef __ia64__
+    uint64_aligned_t bp;     /* mpaddr of boot param area */
+    uint64_aligned_t maxmem; /* Highest memory address for MDT.  */
+    uint64_aligned_t xsi_va; /* Xen shared_info area virtual address.  */
+    uint32_t hypercall_imm;  /* Break imm for Xen hypercalls.  */
+    int8_t vhpt_size_log2;   /* Log2 of VHPT size. */
+#endif
+} xen_domctl_arch_setup_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t);
+
+
+#define XEN_DOMCTL_settimeoffset     24
+struct xen_domctl_settimeoffset {
+    int32_t  time_offset_seconds; /* applied to domain wallclock time */
+};
+typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t);
+
+ 
+#define XEN_DOMCTL_gethvmcontext     33
+#define XEN_DOMCTL_sethvmcontext     34
+typedef struct xen_domctl_hvmcontext {
+    uint32_t size; /* IN/OUT: size of buffer / bytes filled */
+    XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call
+                                          * gethvmcontext with NULL
+                                          * buffer to get size
+                                          * req'd */
+} xen_domctl_hvmcontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t);
+
+
+#define XEN_DOMCTL_set_address_size  35
+#define XEN_DOMCTL_get_address_size  36
+typedef struct xen_domctl_address_size {
+    uint32_t size;
+} xen_domctl_address_size_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t);
+
+
+#define XEN_DOMCTL_real_mode_area    26
+struct xen_domctl_real_mode_area {
+    uint32_t log; /* log2 of Real Mode Area size */
+};
+typedef struct xen_domctl_real_mode_area xen_domctl_real_mode_area_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t);
+
+
+#define XEN_DOMCTL_sendtrigger       28
+#define XEN_DOMCTL_SENDTRIGGER_NMI    0
+#define XEN_DOMCTL_SENDTRIGGER_RESET  1
+#define XEN_DOMCTL_SENDTRIGGER_INIT   2
+struct xen_domctl_sendtrigger {
+    uint32_t  trigger;  /* IN */
+    uint32_t  vcpu;     /* IN */
+};
+typedef struct xen_domctl_sendtrigger xen_domctl_sendtrigger_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_sendtrigger_t);
+
+
+/* Assign PCI device to HVM guest. Sets up IOMMU structures. */
+#define XEN_DOMCTL_assign_device      37
+#define XEN_DOMCTL_test_assign_device 45
+#define XEN_DOMCTL_deassign_device 47
+struct xen_domctl_assign_device {
+    uint32_t  machine_bdf;   /* machine PCI ID of assigned device */
+};
+typedef struct xen_domctl_assign_device xen_domctl_assign_device_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t);
+
+/* Retrieve sibling devices infomation of machine_bdf */
+#define XEN_DOMCTL_get_device_group 50
+struct xen_domctl_get_device_group {
+    uint32_t  machine_bdf;      /* IN */
+    uint32_t  max_sdevs;        /* IN */
+    uint32_t  num_sdevs;        /* OUT */
+    XEN_GUEST_HANDLE_64(uint32)  sdev_array;   /* OUT */
+};
+typedef struct xen_domctl_get_device_group xen_domctl_get_device_group_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t);
+
+/* Pass-through interrupts: bind real irq -> hvm devfn. */
+#define XEN_DOMCTL_bind_pt_irq       38
+#define XEN_DOMCTL_unbind_pt_irq     48
+typedef enum pt_irq_type_e {
+    PT_IRQ_TYPE_PCI,
+    PT_IRQ_TYPE_ISA,
+    PT_IRQ_TYPE_MSI,
+} pt_irq_type_t;
+struct xen_domctl_bind_pt_irq {
+    uint32_t machine_irq;
+    pt_irq_type_t irq_type;
+    uint32_t hvm_domid;
+
+    union {
+        struct {
+            uint8_t isa_irq;
+        } isa;
+        struct {
+            uint8_t bus;
+            uint8_t device;
+            uint8_t intx;
+        } pci;
+        struct {
+            uint8_t gvec;
+            uint32_t gflags;
+        } msi;
+    } u;
+};
+typedef struct xen_domctl_bind_pt_irq xen_domctl_bind_pt_irq_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_bind_pt_irq_t);
+
+
+/* Bind machine I/O address range -> HVM address range. */
+#define XEN_DOMCTL_memory_mapping    39
+#define DPCI_ADD_MAPPING         1
+#define DPCI_REMOVE_MAPPING      0
+struct xen_domctl_memory_mapping {
+    uint64_aligned_t first_gfn; /* first page (hvm guest phys page) in range */
+    uint64_aligned_t first_mfn; /* first page (machine page) in range */
+    uint64_aligned_t nr_mfns;   /* number of pages in range (>0) */
+    uint32_t add_mapping;       /* add or remove mapping */
+    uint32_t padding;           /* padding for 64-bit aligned structure */
+};
+typedef struct xen_domctl_memory_mapping xen_domctl_memory_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_memory_mapping_t);
+
+
+/* Bind machine I/O port range -> HVM I/O port range. */
+#define XEN_DOMCTL_ioport_mapping    40
+struct xen_domctl_ioport_mapping {
+    uint32_t first_gport;     /* first guest IO port*/
+    uint32_t first_mport;     /* first machine IO port */
+    uint32_t nr_ports;        /* size of port range */
+    uint32_t add_mapping;     /* add or remove mapping */
+};
+typedef struct xen_domctl_ioport_mapping xen_domctl_ioport_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_mapping_t);
+
+
+/*
+ * Pin caching type of RAM space for x86 HVM domU.
+ */
+#define XEN_DOMCTL_pin_mem_cacheattr 41
+/* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */
+#define XEN_DOMCTL_MEM_CACHEATTR_UC  0
+#define XEN_DOMCTL_MEM_CACHEATTR_WC  1
+#define XEN_DOMCTL_MEM_CACHEATTR_WT  4
+#define XEN_DOMCTL_MEM_CACHEATTR_WP  5
+#define XEN_DOMCTL_MEM_CACHEATTR_WB  6
+#define XEN_DOMCTL_MEM_CACHEATTR_UCM 7
+struct xen_domctl_pin_mem_cacheattr {
+    uint64_aligned_t start, end;
+    unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */
+};
+typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t);
+
+
+#define XEN_DOMCTL_set_ext_vcpucontext 42
+#define XEN_DOMCTL_get_ext_vcpucontext 43
+struct xen_domctl_ext_vcpucontext {
+    /* IN: VCPU that this call applies to. */
+    uint32_t         vcpu;
+    /*
+     * SET: Size of struct (IN)
+     * GET: Size of struct (OUT)
+     */
+    uint32_t         size;
+#if defined(__i386__) || defined(__x86_64__)
+    /* SYSCALL from 32-bit mode and SYSENTER callback information. */
+    /* NB. SYSCALL from 64-bit mode is contained in vcpu_guest_context_t */
+    uint64_aligned_t syscall32_callback_eip;
+    uint64_aligned_t sysenter_callback_eip;
+    uint16_t         syscall32_callback_cs;
+    uint16_t         sysenter_callback_cs;
+    uint8_t          syscall32_disables_events;
+    uint8_t          sysenter_disables_events;
+#endif
+};
+typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_ext_vcpucontext_t);
+
+/*
+ * Set optimizaton features for a domain
+ */
+#define XEN_DOMCTL_set_opt_feature    44
+struct xen_domctl_set_opt_feature {
+#if defined(__ia64__)
+    struct xen_ia64_opt_feature optf;
+#else
+    /* Make struct non-empty: do not depend on this field name! */
+    uint64_t dummy;
+#endif
+};
+typedef struct xen_domctl_set_opt_feature xen_domctl_set_opt_feature_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_opt_feature_t);
+
+/*
+ * Set the target domain for a domain
+ */
+#define XEN_DOMCTL_set_target    46
+struct xen_domctl_set_target {
+    domid_t target;
+};
+typedef struct xen_domctl_set_target xen_domctl_set_target_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_target_t);
+
+#if defined(__i386__) || defined(__x86_64__)
+# define XEN_CPUID_INPUT_UNUSED  0xFFFFFFFF
+# define XEN_DOMCTL_set_cpuid 49
+struct xen_domctl_cpuid {
+  unsigned int  input[2];
+  unsigned int  eax;
+  unsigned int  ebx;
+  unsigned int  ecx;
+  unsigned int  edx;
+};
+typedef struct xen_domctl_cpuid xen_domctl_cpuid_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t);
+#endif
+
+#define XEN_DOMCTL_subscribe          29
+struct xen_domctl_subscribe {
+    uint32_t port; /* IN */
+};
+typedef struct xen_domctl_subscribe xen_domctl_subscribe_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_subscribe_t);
+
+/*
+ * Define the maximum machine address size which should be allocated
+ * to a guest.
+ */
+#define XEN_DOMCTL_set_machine_address_size  51
+#define XEN_DOMCTL_get_machine_address_size  52
+
+
+struct xen_domctl {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
+    domid_t  domain;
+    union {
+        struct xen_domctl_createdomain      createdomain;
+        struct xen_domctl_getdomaininfo     getdomaininfo;
+        struct xen_domctl_getmemlist        getmemlist;
+        struct xen_domctl_getpageframeinfo  getpageframeinfo;
+        struct xen_domctl_getpageframeinfo2 getpageframeinfo2;
+        struct xen_domctl_vcpuaffinity      vcpuaffinity;
+        struct xen_domctl_shadow_op         shadow_op;
+        struct xen_domctl_max_mem           max_mem;
+        struct xen_domctl_vcpucontext       vcpucontext;
+        struct xen_domctl_getvcpuinfo       getvcpuinfo;
+        struct xen_domctl_max_vcpus         max_vcpus;
+        struct xen_domctl_scheduler_op      scheduler_op;
+        struct xen_domctl_setdomainhandle   setdomainhandle;
+        struct xen_domctl_setdebugging      setdebugging;
+        struct xen_domctl_irq_permission    irq_permission;
+        struct xen_domctl_iomem_permission  iomem_permission;
+        struct xen_domctl_ioport_permission ioport_permission;
+        struct xen_domctl_hypercall_init    hypercall_init;
+        struct xen_domctl_arch_setup        arch_setup;
+        struct xen_domctl_settimeoffset     settimeoffset;
+        struct xen_domctl_real_mode_area    real_mode_area;
+        struct xen_domctl_hvmcontext        hvmcontext;
+        struct xen_domctl_address_size      address_size;
+        struct xen_domctl_sendtrigger       sendtrigger;
+        struct xen_domctl_get_device_group  get_device_group;
+        struct xen_domctl_assign_device     assign_device;
+        struct xen_domctl_bind_pt_irq       bind_pt_irq;
+        struct xen_domctl_memory_mapping    memory_mapping;
+        struct xen_domctl_ioport_mapping    ioport_mapping;
+        struct xen_domctl_pin_mem_cacheattr pin_mem_cacheattr;
+        struct xen_domctl_ext_vcpucontext   ext_vcpucontext;
+        struct xen_domctl_set_opt_feature   set_opt_feature;
+        struct xen_domctl_set_target        set_target;
+        struct xen_domctl_subscribe         subscribe;
+#if defined(__i386__) || defined(__x86_64__)
+        struct xen_domctl_cpuid             cpuid;
+#endif
+        uint8_t                             pad[128];
+    } u;
+};
+typedef struct xen_domctl xen_domctl_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_t);
+
+#endif /* __XEN_PUBLIC_DOMCTL_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/domctl.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/nmi.h
===================================================================
--- xen/interface/nmi.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/nmi.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,78 @@
+/******************************************************************************
+ * nmi.h
+ * 
+ * NMI callback registration and reason codes.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_NMI_H__
+#define __XEN_PUBLIC_NMI_H__
+
+/*
+ * NMI reason codes:
+ * Currently these are x86-specific, stored in arch_shared_info.nmi_reason.
+ */
+ /* I/O-check error reported via ISA port 0x61, bit 6. */
+#define _XEN_NMIREASON_io_error     0
+#define XEN_NMIREASON_io_error      (1UL << _XEN_NMIREASON_io_error)
+ /* Parity error reported via ISA port 0x61, bit 7. */
+#define _XEN_NMIREASON_parity_error 1
+#define XEN_NMIREASON_parity_error  (1UL << _XEN_NMIREASON_parity_error)
+ /* Unknown hardware-generated NMI. */
+#define _XEN_NMIREASON_unknown      2
+#define XEN_NMIREASON_unknown       (1UL << _XEN_NMIREASON_unknown)
+
+/*
+ * long nmi_op(unsigned int cmd, void *arg)
+ * NB. All ops return zero on success, else a negative error code.
+ */
+
+/*
+ * Register NMI callback for this (calling) VCPU. Currently this only makes
+ * sense for domain 0, vcpu 0. All other callers will be returned EINVAL.
+ * arg == pointer to xennmi_callback structure.
+ */
+#define XENNMI_register_callback   0
+struct xennmi_callback {
+    unsigned long handler_address;
+    unsigned long pad;
+};
+typedef struct xennmi_callback xennmi_callback_t;
+DEFINE_XEN_GUEST_HANDLE(xennmi_callback_t);
+
+/*
+ * Deregister NMI callback for this (calling) VCPU.
+ * arg == NULL.
+ */
+#define XENNMI_unregister_callback 1
+
+#endif /* __XEN_PUBLIC_NMI_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/nmi.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/interface/version.h
===================================================================
--- xen/interface/version.h	(.../stable/6/sys)	(revision 0)
+++ xen/interface/version.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * version.h
+ * 
+ * Xen version, type, and compile information.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
+ * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
+ */
+
+#ifndef __XEN_PUBLIC_VERSION_H__
+#define __XEN_PUBLIC_VERSION_H__
+
+/* NB. All ops return zero on success, except XENVER_{version,pagesize} */
+
+/* arg == NULL; returns major:minor (16:16). */
+#define XENVER_version      0
+
+/* arg == xen_extraversion_t. */
+#define XENVER_extraversion 1
+typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))
+
+/* arg == xen_compile_info_t. */
+#define XENVER_compile_info 2
+struct xen_compile_info {
+    char compiler[64];
+    char compile_by[16];
+    char compile_domain[32];
+    char compile_date[32];
+};
+typedef struct xen_compile_info xen_compile_info_t;
+
+#define XENVER_capabilities 3
+typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))
+
+#define XENVER_changeset 4
+typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))
+
+#define XENVER_platform_parameters 5
+struct xen_platform_parameters {
+    unsigned long virt_start;
+};
+typedef struct xen_platform_parameters xen_platform_parameters_t;
+
+#define XENVER_get_features 6
+struct xen_feature_info {
+    unsigned int submap_idx;    /* IN: which 32-bit submap to return */
+    uint32_t     submap;        /* OUT: 32-bit submap */
+};
+typedef struct xen_feature_info xen_feature_info_t;
+
+/* Declares the features reported by XENVER_get_features. */
+#include "features.h"
+
+/* arg == NULL; returns host memory page size. */
+#define XENVER_pagesize 7
+
+/* arg == xen_domain_handle_t. */
+#define XENVER_guest_handle 8
+
+#endif /* __XEN_PUBLIC_VERSION_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

Property changes on: xen/interface/version.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true


Property changes on: xen/interface
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/features.h
===================================================================
--- xen/features.h	(.../stable/6/sys)	(revision 0)
+++ xen/features.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,20 @@
+/******************************************************************************
+ * features.h
+ *
+ * Query the features reported by Xen.
+ *
+ * Copyright (c) 2006, Ian Campbell
+ */
+
+#ifndef __ASM_XEN_FEATURES_H__
+#define __ASM_XEN_FEATURES_H__
+
+#include <xen/interface/version.h>
+
+extern void setup_xen_features(void);
+
+extern uint8_t xen_features[XENFEAT_NR_SUBMAPS * 32];
+
+#define xen_feature(flag)	(xen_features[flag])
+
+#endif /* __ASM_XEN_FEATURES_H__ */

Property changes on: xen/features.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen/features.h:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen/features.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: xen/xenbus/xenbus_xs.c
===================================================================
--- xen/xenbus/xenbus_xs.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_xs.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,929 @@
+/******************************************************************************
+ * xenbus_xs.c
+ *
+ * This is the kernel equivalent of the "xs" library.  We don't need everything
+ * and we use xenbus_comms for communication.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/syslog.h>
+#include <sys/malloc.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kthread.h>
+#include <sys/unistd.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/stdarg.h>
+
+#include <xen/xenbus/xenbusvar.h>
+#include <xen/xenbus/xenbus_comms.h>
+#include <xen/interface/hvm/params.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+static int xs_process_msg(enum xsd_sockmsg_type *type);
+
+int xenwatch_running = 0;
+int xenbus_running = 0;
+int xen_store_evtchn;
+
+struct xs_stored_msg {
+	TAILQ_ENTRY(xs_stored_msg) list;
+
+	struct xsd_sockmsg hdr;
+
+	union {
+		/* Queued replies. */
+		struct {
+			char *body;
+		} reply;
+
+		/* Queued watch events. */
+		struct {
+			struct xenbus_watch *handle;
+			char **vec;
+			unsigned int vec_size;
+		} watch;
+	} u;
+};
+
+struct xs_handle {
+	/* A list of replies. Currently only one will ever be outstanding. */
+	TAILQ_HEAD(xs_handle_list, xs_stored_msg) reply_list;
+	struct mtx reply_lock;
+	int reply_waitq;
+
+	/* One request at a time. */
+	struct sx request_mutex;
+
+	/* Protect transactions against save/restore. */
+	struct sx suspend_mutex;
+};
+
+static struct xs_handle xs_state;
+
+/* List of registered watches, and a lock to protect it. */
+static LIST_HEAD(watch_list_head, xenbus_watch) watches;
+static struct mtx watches_lock;
+/* List of pending watch callback events, and a lock to protect it. */
+static TAILQ_HEAD(event_list_head, xs_stored_msg) watch_events;
+static struct mtx watch_events_lock;
+
+/*
+ * Details of the xenwatch callback kernel thread. The thread waits on the
+ * watch_events_waitq for work to do (queued on watch_events list). When it
+ * wakes up it acquires the xenwatch_mutex before reading the list and
+ * carrying out work.
+ */
+static pid_t xenwatch_pid;
+struct sx xenwatch_mutex;
+static int watch_events_waitq;
+
+#define xsd_error_count	(sizeof(xsd_errors) / sizeof(xsd_errors[0]))
+
+static int
+xs_get_error(const char *errorstring)
+{
+	unsigned int i;
+
+	for (i = 0; i < xsd_error_count; i++) {
+		if (!strcmp(errorstring, xsd_errors[i].errstring))
+			return (xsd_errors[i].errnum);
+	}
+	log(LOG_WARNING, "XENBUS xen store gave: unknown error %s",
+	    errorstring);
+	return (EINVAL);
+}
+
+extern void kdb_backtrace(void);
+
+static int
+xs_read_reply(enum xsd_sockmsg_type *type, unsigned int *len, void **result)
+{
+	struct xs_stored_msg *msg;
+	char *body;
+	int error;
+
+	mtx_lock(&xs_state.reply_lock);
+
+	while (TAILQ_EMPTY(&xs_state.reply_list)) {
+		while (TAILQ_EMPTY(&xs_state.reply_list)) {
+			error = mtx_sleep(&xs_state.reply_waitq,
+			    &xs_state.reply_lock,
+			    PCATCH, "xswait", hz/10);
+			if (error && error != EWOULDBLOCK) {
+				mtx_unlock(&xs_state.reply_lock);
+				return (error);
+			}
+		}
+	}
+
+	msg = TAILQ_FIRST(&xs_state.reply_list);
+	TAILQ_REMOVE(&xs_state.reply_list, msg, list);
+
+	mtx_unlock(&xs_state.reply_lock);
+
+	*type = msg->hdr.type;
+	if (len)
+		*len = msg->hdr.len;
+	body = msg->u.reply.body;
+
+	free(msg, M_DEVBUF);
+	*result = body;
+	return (0);
+}
+
+#if 0
+/* Emergency write. UNUSED*/
+void xenbus_debug_write(const char *str, unsigned int count)
+{
+	struct xsd_sockmsg msg = { 0 };
+
+	msg.type = XS_DEBUG;
+	msg.len = sizeof("print") + count + 1;
+
+	sx_xlock(&xs_state.request_mutex);
+	xb_write(&msg, sizeof(msg));
+	xb_write("print", sizeof("print"));
+	xb_write(str, count);
+	xb_write("", 1);
+	sx_xunlock(&xs_state.request_mutex);
+}
+
+#endif
+
+int
+xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
+{
+	struct xsd_sockmsg req_msg = *msg;
+	int error;
+
+	if (req_msg.type == XS_TRANSACTION_START)
+		sx_slock(&xs_state.suspend_mutex);
+
+	sx_xlock(&xs_state.request_mutex);
+
+	error = xb_write(msg, sizeof(*msg) + msg->len,
+	    &xs_state.request_mutex.lock_object);
+	if (error) {
+		msg->type = XS_ERROR;
+	} else {
+		error = xs_read_reply(&msg->type, &msg->len, result);
+	}
+
+	sx_xunlock(&xs_state.request_mutex);
+
+	if ((msg->type == XS_TRANSACTION_END) ||
+	    ((req_msg.type == XS_TRANSACTION_START) &&
+		(msg->type == XS_ERROR)))
+		sx_sunlock(&xs_state.suspend_mutex);
+
+	return (error);
+}
+
+/*
+ * Send message to xs. The reply is returned in *result and should be
+ * fred with free(*result, M_DEVBUF). Return zero on success or an
+ * error code on failure.
+ */
+static int
+xs_talkv(struct xenbus_transaction t, enum xsd_sockmsg_type type,
+    const struct iovec *iovec, unsigned int num_vecs,
+    unsigned int *len, void **result)
+{
+	struct xsd_sockmsg msg;
+	void *ret = NULL;
+	unsigned int i;
+	int error;
+
+	msg.tx_id = t.id;
+	msg.req_id = 0;
+	msg.type = type;
+	msg.len = 0;
+	for (i = 0; i < num_vecs; i++)
+		msg.len += iovec[i].iov_len;
+
+	sx_xlock(&xs_state.request_mutex);
+
+	error = xb_write(&msg, sizeof(msg),
+	    &xs_state.request_mutex.lock_object);
+	if (error) {
+		sx_xunlock(&xs_state.request_mutex);
+		printf("xs_talkv failed %d\n", error);
+		return (error);
+	}
+
+	for (i = 0; i < num_vecs; i++) {
+		error = xb_write(iovec[i].iov_base, iovec[i].iov_len,
+		    &xs_state.request_mutex.lock_object);
+		if (error) {		
+			sx_xunlock(&xs_state.request_mutex);
+			printf("xs_talkv failed %d\n", error);
+			return (error);
+		}
+	}
+
+	error = xs_read_reply(&msg.type, len, &ret);
+
+	sx_xunlock(&xs_state.request_mutex);
+
+	if (error)
+		return (error);
+
+	if (msg.type == XS_ERROR) {
+		error = xs_get_error(ret);
+		free(ret, M_DEVBUF);
+		return (error);
+	}
+
+#if 0
+	if ((xenwatch_running == 0) && (xenwatch_inline == 0)) {
+		xenwatch_inline = 1;
+		while (!TAILQ_EMPTY(&watch_events) 
+		    && xenwatch_running == 0) {
+						
+			struct xs_stored_msg *wmsg = TAILQ_FIRST(&watch_events);
+			TAILQ_REMOVE(&watch_events, wmsg, list);
+						
+			wmsg->u.watch.handle->callback(
+				wmsg->u.watch.handle,
+				(const char **)wmsg->u.watch.vec,
+				wmsg->u.watch.vec_size);
+			free(wmsg->u.watch.vec, M_DEVBUF);
+			free(wmsg, M_DEVBUF);
+		}
+		xenwatch_inline = 0;
+	}
+#endif
+	KASSERT(msg.type == type, ("bad xenstore message type"));
+
+	if (result)
+		*result = ret;
+	else
+		free(ret, M_DEVBUF);
+
+	return (0);
+}
+
+/* Simplified version of xs_talkv: single message. */
+static int
+xs_single(struct xenbus_transaction t, enum xsd_sockmsg_type type,
+    const char *string, unsigned int *len, void **result)
+{
+	struct iovec iovec;
+
+	iovec.iov_base = (void *)(uintptr_t) string;
+	iovec.iov_len = strlen(string) + 1;
+
+	return (xs_talkv(t, type, &iovec, 1, len, result));
+}
+
+static unsigned int
+count_strings(const char *strings, unsigned int len)
+{
+	unsigned int num;
+	const char *p;
+
+	for (p = strings, num = 0; p < strings + len; p += strlen(p) + 1)
+		num++;
+
+	return num;
+}
+
+/* Return the path to dir with /name appended. Buffer must be kfree()'ed. */ 
+static char *
+join(const char *dir, const char *name)
+{
+	char *buffer;
+
+	buffer = malloc(strlen(dir) + strlen("/") + strlen(name) + 1,
+	    M_DEVBUF, M_WAITOK);
+
+	strcpy(buffer, dir);
+	if (strcmp(name, "")) {
+		strcat(buffer, "/");
+		strcat(buffer, name);
+	}
+
+	return (buffer);
+}
+
+static char **
+split(char *strings, unsigned int len, unsigned int *num)
+{
+	char *p, **ret;
+
+	/* Count the strings. */
+	*num = count_strings(strings, len) + 1;
+
+	/* Transfer to one big alloc for easy freeing. */
+	ret = malloc(*num * sizeof(char *) + len, M_DEVBUF, M_WAITOK);
+	memcpy(&ret[*num], strings, len);
+	free(strings, M_DEVBUF);
+
+	strings = (char *)&ret[*num];
+	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
+		ret[(*num)++] = p;
+
+	ret[*num] = strings + len;
+		
+	return ret;
+}
+
+/*
+ * Return the contents of a directory in *result which should be freed
+ * with free(*result, M_DEVBUF).
+ */
+int
+xenbus_directory(struct xenbus_transaction t, const char *dir,
+    const char *node, unsigned int *num, char ***result)
+{
+	char *strings, *path;
+	unsigned int len = 0;
+	int error;
+
+	path = join(dir, node);
+	error = xs_single(t, XS_DIRECTORY, path, &len, (void **) &strings);
+	free(path, M_DEVBUF);
+	if (error)
+		return (error);
+
+	*result = split(strings, len, num);
+	return (0);
+}
+
+/*
+ * Check if a path exists. Return 1 if it does.
+ */
+int
+xenbus_exists(struct xenbus_transaction t, const char *dir, const char *node)
+{
+	char **d;
+	int error, dir_n;
+
+	error = xenbus_directory(t, dir, node, &dir_n, &d);
+	if (error)
+		return (0);
+	free(d, M_DEVBUF);
+	return (1);
+}
+
+/*
+ * Get the value of a single file.  Returns the contents in *result
+ * which should be freed with free(*result, M_DEVBUF) after use.
+ * The length of the value in bytes is returned in *len.
+ */
+int
+xenbus_read(struct xenbus_transaction t, const char *dir, const char *node,
+    unsigned int *len, void **result)
+{
+	char *path;
+	void *ret;
+	int error;
+
+	path = join(dir, node);
+	error = xs_single(t, XS_READ, path, len, &ret);
+	free(path, M_DEVBUF);
+	if (error)
+		return (error);
+	*result = ret;
+	return (0);
+}
+
+/*
+ * Write the value of a single file.  Returns error on failure.
+ */
+int
+xenbus_write(struct xenbus_transaction t, const char *dir, const char *node,
+    const char *string)
+{
+	char *path;
+	struct iovec iovec[2];
+	int error;
+
+	path = join(dir, node);
+
+	iovec[0].iov_base = (void *)(uintptr_t) path;
+	iovec[0].iov_len = strlen(path) + 1;
+	iovec[1].iov_base = (void *)(uintptr_t) string;
+	iovec[1].iov_len = strlen(string);
+
+	error = xs_talkv(t, XS_WRITE, iovec, 2, NULL, NULL);
+	free(path, M_DEVBUF);
+
+	return (error);
+}
+
+/*
+ * Create a new directory.
+ */
+int
+xenbus_mkdir(struct xenbus_transaction t, const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	ret = xs_single(t, XS_MKDIR, path, NULL, NULL);
+	free(path, M_DEVBUF);
+
+	return (ret);
+}
+
+/*
+ * Destroy a file or directory (directories must be empty).
+ */
+int
+xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node)
+{
+	char *path;
+	int ret;
+
+	path = join(dir, node);
+	ret = xs_single(t, XS_RM, path, NULL, NULL);
+	free(path, M_DEVBUF);
+
+	return (ret);
+}
+
+/*
+ * Start a transaction: changes by others will not be seen during this
+ * transaction, and changes will not be visible to others until end.
+ */
+int
+xenbus_transaction_start(struct xenbus_transaction *t)
+{
+	char *id_str;
+	int error;
+
+	sx_slock(&xs_state.suspend_mutex);
+	error = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL,
+	    (void **) &id_str);
+	if (error) {
+		sx_sunlock(&xs_state.suspend_mutex);
+		return (error);
+	}
+
+	t->id = strtoul(id_str, NULL, 0);
+	free(id_str, M_DEVBUF);
+
+	return (0);
+}
+
+/*
+ * End a transaction.  If abandon is true, transaction is discarded
+ * instead of committed.
+ */
+int xenbus_transaction_end(struct xenbus_transaction t, int abort)
+{
+	char abortstr[2];
+	int error;
+
+	if (abort)
+		strcpy(abortstr, "F");
+	else
+		strcpy(abortstr, "T");
+
+	error = xs_single(t, XS_TRANSACTION_END, abortstr, NULL, NULL);
+		
+	sx_sunlock(&xs_state.suspend_mutex);
+
+	return (error);
+}
+
+/* Single read and scanf: returns zero or errno. */
+int
+xenbus_scanf(struct xenbus_transaction t,
+    const char *dir, const char *node, int *scancountp, const char *fmt, ...)
+{
+	va_list ap;
+	int error, ns;
+	char *val;
+
+	error = xenbus_read(t, dir, node, NULL, (void **) &val);
+	if (error)
+		return (error);
+
+	va_start(ap, fmt);
+	ns = vsscanf(val, fmt, ap);
+	va_end(ap);
+	free(val, M_DEVBUF);
+	/* Distinctive errno. */
+	if (ns == 0)
+		return (ERANGE);
+	if (scancountp)
+		*scancountp = ns;
+	return (0);
+}
+
+/* Single printf and write: returns zero or errno. */
+int
+xenbus_printf(struct xenbus_transaction t,
+    const char *dir, const char *node, const char *fmt, ...)
+{
+	va_list ap;
+	int error, ret;
+#define PRINTF_BUFFER_SIZE 4096
+	char *printf_buffer;
+
+	printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK);
+
+	va_start(ap, fmt);
+	ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
+	va_end(ap);
+
+	KASSERT(ret <= PRINTF_BUFFER_SIZE-1, ("xenbus_printf: message too large"));
+	error = xenbus_write(t, dir, node, printf_buffer);
+
+	free(printf_buffer, M_DEVBUF);
+
+	return (error);
+}
+
+/* Takes tuples of names, scanf-style args, and void **, NULL terminated. */
+int
+xenbus_gather(struct xenbus_transaction t, const char *dir, ...)
+{
+	va_list ap;
+	const char *name;
+	int error, i;
+
+	for (i = 0; i < 10000; i++)
+		HYPERVISOR_yield();
+		
+	va_start(ap, dir);
+	error = 0;
+	while (error == 0 && (name = va_arg(ap, char *)) != NULL) {
+		const char *fmt = va_arg(ap, char *);
+		void *result = va_arg(ap, void *);
+		char *p;
+
+		error = xenbus_read(t, dir, name, NULL, (void **) &p);
+		if (error)
+			break;
+
+		if (fmt) {
+			if (sscanf(p, fmt, result) == 0)
+				error = EINVAL;
+			free(p, M_DEVBUF);
+		} else
+			*(char **)result = p;
+	}
+	va_end(ap);
+
+	return (error);
+}
+
+static int
+xs_watch(const char *path, const char *token)
+{
+	struct iovec iov[2];
+
+	iov[0].iov_base = (void *)(uintptr_t) path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (void *)(uintptr_t) token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return (xs_talkv(XBT_NIL, XS_WATCH, iov, 2, NULL, NULL));
+}
+
+static int
+xs_unwatch(const char *path, const char *token)
+{
+	struct iovec iov[2];
+
+	iov[0].iov_base = (void *)(uintptr_t) path;
+	iov[0].iov_len = strlen(path) + 1;
+	iov[1].iov_base = (void *)(uintptr_t) token;
+	iov[1].iov_len = strlen(token) + 1;
+
+	return (xs_talkv(XBT_NIL, XS_UNWATCH, iov, 2, NULL, NULL));
+}
+
+static struct xenbus_watch *
+find_watch(const char *token)
+{
+	struct xenbus_watch *i, *cmp;
+
+	cmp = (void *)strtoul(token, NULL, 16);
+
+	LIST_FOREACH(i, &watches, list)
+		if (i == cmp)
+			return (i);
+
+	return (NULL);
+}
+
+/* Register callback to watch this node. */
+int
+register_xenbus_watch(struct xenbus_watch *watch)
+{
+	/* Pointer in ascii is the token. */
+	char token[sizeof(watch) * 2 + 1];
+	int error;
+
+	sprintf(token, "%lX", (long)watch);
+
+	sx_slock(&xs_state.suspend_mutex);
+
+	mtx_lock(&watches_lock);
+	KASSERT(find_watch(token) == NULL, ("watch already registered"));
+	LIST_INSERT_HEAD(&watches, watch, list);
+	mtx_unlock(&watches_lock);
+
+	error = xs_watch(watch->node, token);
+		
+	/* Ignore errors due to multiple registration. */
+	if (error == EEXIST) {
+		mtx_lock(&watches_lock);
+		LIST_REMOVE(watch, list);
+		mtx_unlock(&watches_lock);
+	}
+
+	sx_sunlock(&xs_state.suspend_mutex);
+
+	return (error);
+}
+
+void
+unregister_xenbus_watch(struct xenbus_watch *watch)
+{
+	struct xs_stored_msg *msg, *tmp;
+	char token[sizeof(watch) * 2 + 1];
+	int error;
+
+	sprintf(token, "%lX", (long)watch);
+		
+	sx_slock(&xs_state.suspend_mutex);
+
+	mtx_lock(&watches_lock);
+	KASSERT(find_watch(token), ("watch not registered"));
+	LIST_REMOVE(watch, list);
+	mtx_unlock(&watches_lock);
+
+	error = xs_unwatch(watch->node, token);
+	if (error)
+		log(LOG_WARNING, "XENBUS Failed to release watch %s: %i\n",
+		    watch->node, error);
+
+	sx_sunlock(&xs_state.suspend_mutex);
+
+	/* Cancel pending watch events. */
+	mtx_lock(&watch_events_lock);
+	TAILQ_FOREACH_SAFE(msg, &watch_events, list, tmp) {
+		if (msg->u.watch.handle != watch)
+			continue;
+		TAILQ_REMOVE(&watch_events, msg, list);
+		free(msg->u.watch.vec, M_DEVBUF);
+		free(msg, M_DEVBUF);
+	}
+	mtx_unlock(&watch_events_lock);
+
+	/* Flush any currently-executing callback, unless we are it. :-) */
+	if (curproc->p_pid != xenwatch_pid) {
+		sx_xlock(&xenwatch_mutex);
+		sx_xunlock(&xenwatch_mutex);
+	}
+}
+
+void
+xs_suspend(void)
+{	
+
+	sx_xlock(&xs_state.suspend_mutex);
+	sx_xlock(&xs_state.request_mutex);
+}
+
+void
+xs_resume(void)
+{
+	struct xenbus_watch *watch;
+	char token[sizeof(watch) * 2 + 1];
+
+	sx_xunlock(&xs_state.request_mutex);
+
+	/* No need for watches_lock: the suspend_mutex is sufficient. */
+	LIST_FOREACH(watch, &watches, list) {
+		sprintf(token, "%lX", (long)watch);
+		xs_watch(watch->node, token);
+	}
+
+	sx_xunlock(&xs_state.suspend_mutex);
+}
+
+static void
+xenwatch_thread(void *unused)
+{
+	struct xs_stored_msg *msg;
+
+	for (;;) {
+
+		mtx_lock(&watch_events_lock);
+		while (TAILQ_EMPTY(&watch_events))
+			mtx_sleep(&watch_events_waitq,
+			    &watch_events_lock,
+			    PWAIT | PCATCH, "waitev", hz/10);
+
+		mtx_unlock(&watch_events_lock);
+		sx_xlock(&xenwatch_mutex);
+
+		mtx_lock(&watch_events_lock);
+		msg = TAILQ_FIRST(&watch_events);
+		if (msg)
+			TAILQ_REMOVE(&watch_events, msg, list);
+		mtx_unlock(&watch_events_lock);
+
+		if (msg != NULL) {
+			msg->u.watch.handle->callback(
+				msg->u.watch.handle,
+				(const char **)msg->u.watch.vec,
+				msg->u.watch.vec_size);
+			free(msg->u.watch.vec, M_DEVBUF);
+			free(msg, M_DEVBUF);
+		}
+
+		sx_xunlock(&xenwatch_mutex);
+	}
+}
+
+static int
+xs_process_msg(enum xsd_sockmsg_type *type)
+{
+	struct xs_stored_msg *msg;
+	char *body;
+	int error;
+		
+	msg = malloc(sizeof(*msg), M_DEVBUF, M_WAITOK);
+	mtx_lock(&xs_state.reply_lock);
+	error = xb_read(&msg->hdr, sizeof(msg->hdr),
+	    &xs_state.reply_lock.mtx_object);
+	mtx_unlock(&xs_state.reply_lock);
+	if (error) {
+		free(msg, M_DEVBUF);
+		return (error);
+	}
+
+	body = malloc(msg->hdr.len + 1, M_DEVBUF, M_WAITOK);
+	mtx_lock(&xs_state.reply_lock);
+	error = xb_read(body, msg->hdr.len,
+	    &xs_state.reply_lock.mtx_object); 
+	mtx_unlock(&xs_state.reply_lock);
+	if (error) {
+		free(body, M_DEVBUF);
+		free(msg, M_DEVBUF);
+		return (error);
+	}
+	body[msg->hdr.len] = '\0';
+
+	*type = msg->hdr.type;
+	if (msg->hdr.type == XS_WATCH_EVENT) {
+		msg->u.watch.vec = split(body, msg->hdr.len,
+		    &msg->u.watch.vec_size);
+				
+		mtx_lock(&watches_lock);
+		msg->u.watch.handle = find_watch(
+			msg->u.watch.vec[XS_WATCH_TOKEN]);
+		if (msg->u.watch.handle != NULL) {
+			mtx_lock(&watch_events_lock);
+			TAILQ_INSERT_TAIL(&watch_events, msg, list);
+			wakeup(&watch_events_waitq);
+			mtx_unlock(&watch_events_lock);
+		} else {
+			free(msg->u.watch.vec, M_DEVBUF);
+			free(msg, M_DEVBUF);
+		}
+		mtx_unlock(&watches_lock);
+	} else {
+		msg->u.reply.body = body;
+		mtx_lock(&xs_state.reply_lock);
+		TAILQ_INSERT_TAIL(&xs_state.reply_list, msg, list);
+		wakeup(&xs_state.reply_waitq);
+		mtx_unlock(&xs_state.reply_lock);
+	}
+		
+	return 0;
+}
+
+static void
+xenbus_thread(void *unused)
+{
+	int error;
+	enum xsd_sockmsg_type type;
+
+	xenbus_running = 1;
+
+	for (;;) {
+		error = xs_process_msg(&type);
+		if (error) 
+			printf("XENBUS error %d while reading message\n",
+			    error);
+	}
+}
+
+#ifdef XENHVM
+static unsigned long xen_store_mfn;
+char *xen_store;
+
+static inline unsigned long
+hvm_get_parameter(int index)
+{
+	struct xen_hvm_param xhv;
+	int error;
+	
+	xhv.domid = DOMID_SELF;
+	xhv.index = index;
+	error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
+	if (error) {
+		printf("hvm_get_parameter: failed to get %d, error %d\n",
+		    index, error);
+		return (0);
+	}
+	return (xhv.value);
+}
+
+#endif
+
+int
+xs_init(void)
+{
+	int error;
+	struct proc *p;
+
+#ifdef XENHVM
+	xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+	xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+	xen_store = pmap_mapdev(xen_store_mfn * PAGE_SIZE, PAGE_SIZE);
+#else
+	xen_store_evtchn = xen_start_info->store_evtchn;
+#endif
+
+	TAILQ_INIT(&xs_state.reply_list);
+	TAILQ_INIT(&watch_events);
+	sx_init(&xenwatch_mutex, "xenwatch");
+
+		
+	mtx_init(&xs_state.reply_lock, "state reply", NULL, MTX_DEF);
+	sx_init(&xs_state.request_mutex, "xenstore request");
+	sx_init(&xs_state.suspend_mutex, "xenstore suspend");
+
+		
+#if 0
+	mtx_init(&xs_state.suspend_mutex, "xenstore suspend", NULL, MTX_DEF);
+	sema_init(&xs_state.request_mutex, 1, "xenstore request");
+	sema_init(&xenwatch_mutex, 1, "xenwatch");
+#endif
+	mtx_init(&watches_lock, "watches", NULL, MTX_DEF);
+	mtx_init(&watch_events_lock, "watch events", NULL, MTX_DEF);
+   
+	/* Initialize the shared memory rings to talk to xenstored */
+	error = xb_init_comms();
+	if (error)
+		return (error);
+
+	xenwatch_running = 1;
+	error = kthread_create(xenwatch_thread, NULL, &p,
+	    RFHIGHPID, 0, "xenwatch");
+	if (error)
+		return (error);
+	xenwatch_pid = p->p_pid;
+
+	error = kthread_create(xenbus_thread, NULL, NULL, 
+	    RFHIGHPID, 0, "xenbus");
+	
+	return (error);
+}

Property changes on: xen/xenbus/xenbus_xs.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/init.txt
===================================================================
--- xen/xenbus/init.txt	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/init.txt	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,14 @@
+
+
+- frontend driver initializes static xenbus_driver with _ids, _probe, _remove, 
+_resume, _otherend_changed
+
+    - initialization calls xenbus_register_frontend(xenbus_driver)
+
+        - xenbus_register_frontend sets read_otherend details to read_backend_details
+	then calls xenbus_register_driver_common(xenbus_driver, xenbus_frontend)
+
+	     - xenbus_register_driver_common sets underlying driver name to xenbus_driver name
+	     underlying driver bus to xenbus_frontend's bus, driver's probe to xenbus_dev_probe
+	     driver's remove to xenbus_dev_remove then calls driver_register
+

Property changes on: xen/xenbus/init.txt
___________________________________________________________________
Added: fbsd:nokeyword
   + true
Added: fbsd:nokeywords
   + true

Index: xen/xenbus/xenbus_client.c
===================================================================
--- xen/xenbus/xenbus_client.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_client.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,251 @@
+/******************************************************************************
+ * Client-facing interface for the Xenbus driver.  In other words, the
+ * interface between the Xenbus and the device-specific code, be it the
+ * frontend or the backend of that driver.
+ *
+ * Copyright (C) 2005 XenSource Ltd
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#if 0
+#define DPRINTK(fmt, args...) \
+    printk("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) ((void)0)
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/cdefs.h>
+#include <sys/types.h>
+#include <sys/malloc.h>
+#include <sys/libkern.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/evtchn.h>
+#include <xen/gnttab.h>
+#include <xen/xenbus/xenbusvar.h>
+#include <machine/stdarg.h>
+
+const char *
+xenbus_strstate(XenbusState state)
+{
+	static const char *const name[] = {
+		[ XenbusStateUnknown      ] = "Unknown",
+		[ XenbusStateInitialising ] = "Initialising",
+		[ XenbusStateInitWait     ] = "InitWait",
+		[ XenbusStateInitialised  ] = "Initialised",
+		[ XenbusStateConnected    ] = "Connected",
+		[ XenbusStateClosing      ] = "Closing",
+		[ XenbusStateClosed	  ] = "Closed",
+	};
+
+	return ((state < (XenbusStateClosed + 1)) ? name[state] : "INVALID");
+}
+
+int 
+xenbus_watch_path(device_t dev, char *path, struct xenbus_watch *watch, 
+    void (*callback)(struct xenbus_watch *, const char **, unsigned int))
+{
+	int error;
+
+	watch->node = path;
+	watch->callback = callback;
+
+	error = register_xenbus_watch(watch);
+
+	if (error) {
+		watch->node = NULL;
+		watch->callback = NULL;
+		xenbus_dev_fatal(dev, error, "adding watch on %s", path);
+	}
+
+	return (error);
+}
+
+int
+xenbus_watch_path2(device_t dev, const char *path,
+    const char *path2, struct xenbus_watch *watch, 
+    void (*callback)(struct xenbus_watch *, const char **, unsigned int))
+{
+	int error;
+	char *state = malloc(strlen(path) + 1 + strlen(path2) + 1,
+	    M_DEVBUF, M_WAITOK);
+
+	strcpy(state, path);
+	strcat(state, "/");
+	strcat(state, path2);
+
+	error = xenbus_watch_path(dev, state, watch, callback);
+	if (error) {
+		free(state, M_DEVBUF);
+	}
+
+	return (error);
+}
+
+/**
+ * Return the path to the error node for the given device, or NULL on failure.
+ * If the value returned is non-NULL, then it is the caller's to kfree.
+ */
+static char *
+error_path(device_t dev)
+{
+	char *path_buffer = malloc(strlen("error/")
+	    + strlen(xenbus_get_node(dev)) + 1, M_DEVBUF, M_WAITOK);
+
+	strcpy(path_buffer, "error/");
+	strcpy(path_buffer + strlen("error/"), xenbus_get_node(dev));
+
+	return (path_buffer);
+}
+
+
+static void
+_dev_error(device_t dev, int err, const char *fmt, va_list ap)
+{
+	int ret;
+	unsigned int len;
+	char *printf_buffer = NULL, *path_buffer = NULL;
+
+#define PRINTF_BUFFER_SIZE 4096
+	printf_buffer = malloc(PRINTF_BUFFER_SIZE, M_DEVBUF, M_WAITOK);
+
+	len = sprintf(printf_buffer, "%i ", err);
+	ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
+
+	KASSERT(len + ret <= PRINTF_BUFFER_SIZE-1, ("xenbus error message too big"));
+#if 0	
+	dev_err(&dev->dev, "%s\n", printf_buffer);
+#endif		
+	path_buffer = error_path(dev);
+
+	if (path_buffer == NULL) {
+		printf("xenbus: failed to write error node for %s (%s)\n",
+		       xenbus_get_node(dev), printf_buffer);
+		goto fail;
+	}
+
+	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
+		printf("xenbus: failed to write error node for %s (%s)\n",
+		       xenbus_get_node(dev), printf_buffer);
+		goto fail;
+	}
+
+ fail:
+	if (printf_buffer)
+		free(printf_buffer, M_DEVBUF);
+	if (path_buffer)
+		free(path_buffer, M_DEVBUF);
+}
+
+void
+xenbus_dev_error(device_t dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+}
+
+void
+xenbus_dev_fatal(device_t dev, int err, const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	_dev_error(dev, err, fmt, ap);
+	va_end(ap);
+	
+	xenbus_set_state(dev, XenbusStateClosing);
+}
+
+int
+xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp)
+{
+	int error;
+	grant_ref_t ref;
+
+	error = gnttab_grant_foreign_access(
+		xenbus_get_otherend_id(dev), ring_mfn, 0, &ref);
+	if (error) {
+		xenbus_dev_fatal(dev, error, "granting access to ring page");
+		return (error);
+	}
+
+	*refp = ref;
+	return (0);
+}
+
+int
+xenbus_alloc_evtchn(device_t dev, int *port)
+{
+	struct evtchn_alloc_unbound alloc_unbound;
+	int err;
+
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = xenbus_get_otherend_id(dev);
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+
+	if (err) {
+		xenbus_dev_fatal(dev, -err, "allocating event channel");
+		return (-err);
+	}
+	*port = alloc_unbound.port;
+	return (0);
+}
+
+int
+xenbus_free_evtchn(device_t dev, int port)
+{
+	struct evtchn_close close;
+	int err;
+
+	close.port = port;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+	if (err) {
+		xenbus_dev_error(dev, -err, "freeing event channel %d", port);
+		return (-err);
+	}
+	return (0);
+}
+
+XenbusState
+xenbus_read_driver_state(const char *path)
+{
+	XenbusState result;
+	int error;
+
+	error = xenbus_gather(XBT_NIL, path, "state", "%d", &result, NULL);
+	if (error)
+		result = XenbusStateClosed;
+
+	return (result);
+}

Property changes on: xen/xenbus/xenbus_client.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbus_comms.c
===================================================================
--- xen/xenbus/xenbus_comms.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_comms.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,226 @@
+/******************************************************************************
+ * xenbus_comms.c
+ *
+ * Low level code to talks to Xen Store: ringbuffer and event channel.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+#include <xen/interface/io/xs_wire.h>
+#include <xen/xenbus/xenbus_comms.h>
+
+static unsigned int xenstore_irq;
+
+static inline struct xenstore_domain_interface *
+xenstore_domain_interface(void)
+{
+
+	return (struct xenstore_domain_interface *)xen_store;
+}
+
+static void
+xb_intr(void * arg __attribute__((unused)))
+{
+
+	wakeup(xen_store);
+}
+
+static int
+xb_check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
+{
+
+	return ((prod - cons) <= XENSTORE_RING_SIZE);
+}
+
+static void *
+xb_get_output_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
+    char *buf, uint32_t *len)
+{
+
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
+	if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
+		*len = XENSTORE_RING_SIZE - (prod - cons);
+	return (buf + MASK_XENSTORE_IDX(prod));
+}
+
+static const void *
+xb_get_input_chunk(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod,
+    const char *buf, uint32_t *len)
+{
+
+	*len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
+	if ((prod - cons) < *len)
+		*len = prod - cons;
+	return (buf + MASK_XENSTORE_IDX(cons));
+}
+
+int
+xb_write(const void *tdata, unsigned len, struct lock_object *lock)
+{
+	struct xenstore_domain_interface *intf = xenstore_domain_interface();
+	XENSTORE_RING_IDX cons, prod;
+	const char *data = (const char *)tdata;
+	int error;
+
+	while (len != 0) {
+		void *dst;
+		unsigned int avail;
+
+		while ((intf->req_prod - intf->req_cons)
+		    == XENSTORE_RING_SIZE) {
+			error = _sleep(intf,
+			    lock,
+			    PCATCH, "xbwrite", hz/10);
+			if (error && error != EWOULDBLOCK)
+				return (error);
+		}
+
+		/* Read indexes, then verify. */
+		cons = intf->req_cons;
+		prod = intf->req_prod;
+		mb();
+		if (!xb_check_indexes(cons, prod)) {
+			intf->req_cons = intf->req_prod = 0;
+			return (EIO);
+		}
+
+		dst = xb_get_output_chunk(cons, prod, intf->req, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+		mb();
+				
+		memcpy(dst, data, avail);
+		data += avail;
+		len -= avail;
+
+		/* Other side must not see new header until data is there. */
+		wmb();
+		intf->req_prod += avail;
+
+		/* This implies mb() before other side sees interrupt. */
+		notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return (0);
+}
+
+int
+xb_read(void *tdata, unsigned len, struct lock_object *lock)
+{
+	struct xenstore_domain_interface *intf = xenstore_domain_interface();
+	XENSTORE_RING_IDX cons, prod;
+	char *data = (char *)tdata;
+	int error;
+
+	while (len != 0) {
+		unsigned int avail;
+		const char *src;
+
+		while (intf->rsp_cons == intf->rsp_prod) {
+			error = _sleep(intf, lock,
+			    PCATCH, "xbread", hz/10);
+			if (error && error != EWOULDBLOCK)
+				return (error);
+		}
+			
+		/* Read indexes, then verify. */
+		cons = intf->rsp_cons;
+		prod = intf->rsp_prod;
+		if (!xb_check_indexes(cons, prod)) {
+			intf->rsp_cons = intf->rsp_prod = 0;
+			return (EIO);
+		}
+				
+		src = xb_get_input_chunk(cons, prod, intf->rsp, &avail);
+		if (avail == 0)
+			continue;
+		if (avail > len)
+			avail = len;
+
+		/* We must read header before we read data. */
+		rmb();
+
+		memcpy(data, src, avail);
+		data += avail;
+		len -= avail;
+
+		/* Other side must not see free space until we've copied out */
+		mb();
+		intf->rsp_cons += avail;
+
+		/* Implies mb(): they will see new header. */
+		notify_remote_via_evtchn(xen_store_evtchn);
+	}
+
+	return (0);
+}
+
+/* Set up interrupt handler off store event channel. */
+int
+xb_init_comms(void)
+{
+	struct xenstore_domain_interface *intf = xenstore_domain_interface();
+	int error;
+
+	if (intf->rsp_prod != intf->rsp_cons) {
+		log(LOG_WARNING, "XENBUS response ring is not quiescent "
+		    "(%08x:%08x): fixing up\n",
+		    intf->rsp_cons, intf->rsp_prod);
+		intf->rsp_cons = intf->rsp_prod;
+	}
+
+	if (xenstore_irq)
+		unbind_from_irqhandler(xenstore_irq);
+
+	error = bind_caller_port_to_irqhandler(
+		xen_store_evtchn, "xenbus",
+		    xb_intr, NULL, INTR_TYPE_NET, &xenstore_irq);
+	if (error) {
+		log(LOG_WARNING, "XENBUS request irq failed %i\n", error);
+		return (error);
+	}
+
+	return (0);
+}

Property changes on: xen/xenbus/xenbus_comms.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbus_dev.c
===================================================================
--- xen/xenbus/xenbus_dev.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_dev.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,229 @@
+/*
+ * xenbus_dev.c
+ * 
+ * Driver giving user-space access to the kernel's xenbus connection
+ * to xenstore.
+ * 
+ * Copyright (c) 2005, Christian Limpach
+ * Copyright (c) 2005, Rusty Russell, IBM Corporation
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/cdefs.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/xenbus/xenbusvar.h>
+#include <xen/xenbus/xenbus_comms.h>
+
+struct xenbus_dev_transaction {
+	LIST_ENTRY(xenbus_dev_transaction) list;
+	struct xenbus_transaction handle;
+};
+
+struct xenbus_dev_data {
+	/* In-progress transaction. */
+	LIST_HEAD(xdd_list_head, xenbus_dev_transaction) transactions;
+
+	/* Partial request. */
+	unsigned int len;
+	union {
+		struct xsd_sockmsg msg;
+		char buffer[PAGE_SIZE];
+	} u;
+
+	/* Response queue. */
+#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
+	char read_buffer[PAGE_SIZE];
+	unsigned int read_cons, read_prod;
+};
+
+static int 
+xenbus_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int error;
+	struct xenbus_dev_data *u = dev->si_drv1;
+
+	while (u->read_prod == u->read_cons) {
+		error = tsleep(u, PCATCH, "xbdread", hz/10);
+		if (error && error != EWOULDBLOCK)
+			return (error);
+	}
+
+	while (uio->uio_resid > 0) {
+		if (u->read_cons == u->read_prod)
+			break;
+		error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
+		    1, uio);
+		if (error)
+			return (error);
+		u->read_cons++;
+	}
+	return (0);
+}
+
+static void
+queue_reply(struct xenbus_dev_data *u, char *data, unsigned int len)
+{
+	int i;
+
+	for (i = 0; i < len; i++, u->read_prod++)
+		u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
+
+	KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer),
+	    ("xenstore reply too big"));
+
+	wakeup(u);
+}
+
+static int 
+xenbus_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int error;
+	struct xenbus_dev_data *u = dev->si_drv1;
+	struct xenbus_dev_transaction *trans;
+	void *reply;
+	int len = uio->uio_resid;
+
+	if ((len + u->len) > sizeof(u->u.buffer))
+		return (EINVAL);
+
+	error = uiomove(u->u.buffer + u->len, len, uio);
+	if (error)
+		return (error);
+
+	u->len += len;
+	if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
+		return (0);
+
+	switch (u->u.msg.type) {
+	case XS_TRANSACTION_START:
+	case XS_TRANSACTION_END:
+	case XS_DIRECTORY:
+	case XS_READ:
+	case XS_GET_PERMS:
+	case XS_RELEASE:
+	case XS_GET_DOMAIN_PATH:
+	case XS_WRITE:
+	case XS_MKDIR:
+	case XS_RM:
+	case XS_SET_PERMS:
+		error = xenbus_dev_request_and_reply(&u->u.msg, &reply);
+		if (!error) {
+			if (u->u.msg.type == XS_TRANSACTION_START) {
+				trans = malloc(sizeof(*trans), M_DEVBUF,
+				    M_WAITOK);
+				trans->handle.id = strtoul(reply, NULL, 0);
+				LIST_INSERT_HEAD(&u->transactions, trans, list);
+			} else if (u->u.msg.type == XS_TRANSACTION_END) {
+				LIST_FOREACH(trans, &u->transactions, list)
+					if (trans->handle.id == u->u.msg.tx_id)
+						break;
+#if 0 /* XXX does this mean the list is empty? */
+				BUG_ON(&trans->list == &u->transactions);
+#endif
+				LIST_REMOVE(trans, list);
+				free(trans, M_DEVBUF);
+			}
+			queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
+			queue_reply(u, (char *)reply, u->u.msg.len);
+			free(reply, M_DEVBUF);
+		}
+		break;
+
+	default:
+		error = EINVAL;
+		break;
+	}
+
+	if (error == 0)
+		u->len = 0;
+
+	return (error);
+}
+
+static int
+xenbus_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
+{
+	struct xenbus_dev_data *u;
+
+	if (xen_store_evtchn == 0)
+		return (ENOENT);
+#if 0 /* XXX figure out if equiv needed */
+	nonseekable_open(inode, filp);
+#endif
+	u = malloc(sizeof(*u), M_DEVBUF, M_WAITOK|M_ZERO);
+	LIST_INIT(&u->transactions);
+        dev->si_drv1 = u;
+
+	return (0);
+}
+
+static int
+xenbus_dev_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
+{
+	struct xenbus_dev_data *u = dev->si_drv1;
+	struct xenbus_dev_transaction *trans, *tmp;
+
+	LIST_FOREACH_SAFE(trans, &u->transactions, list, tmp) {
+		xenbus_transaction_end(trans->handle, 1);
+		LIST_REMOVE(trans, list);
+		free(trans, M_DEVBUF);
+	}
+
+	free(u, M_DEVBUF);
+	return (0);
+}
+
+static struct cdevsw xenbus_dev_cdevsw = {
+	.d_version = D_VERSION,	
+	.d_read = xenbus_dev_read,
+	.d_write = xenbus_dev_write,
+	.d_open = xenbus_dev_open,
+	.d_close = xenbus_dev_close,
+	.d_name = "xenbus_dev",
+};
+
+static int
+xenbus_dev_sysinit(void)
+{
+	make_dev(&xenbus_dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0400,
+	    "xen/xenbus");
+
+	return (0);
+}
+SYSINIT(xenbus_dev_sysinit, SI_SUB_DRIVERS, SI_ORDER_MIDDLE,
+    xenbus_dev_sysinit, NULL);

Property changes on: xen/xenbus/xenbus_dev.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbus_probe_backend.c
===================================================================
--- xen/xenbus/xenbus_probe_backend.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_probe_backend.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,308 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have (backend half).
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005, 2006 XenSource Ltd
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#if 0
+#define DPRINTK(fmt, args...) \
+    printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) ((void)0)
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/cdefs.h>
+#include <sys/time.h>
+#include <sys/sema.h>
+#include <sys/eventhandler.h>
+#include <sys/errno.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/conf.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/proc.h>
+#include <sys/bus.h>
+#include <sys/sx.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/xenbus.h>
+#include <machine/stdarg.h>
+
+#include <xen/evtchn.h>
+#include <xen/xenbus/xenbus_comms.h>
+
+#define BUG_ON        PANIC_IF
+#define semaphore     sema
+#define rw_semaphore  sema
+#define DEFINE_SPINLOCK(lock) struct mtx lock
+#define DECLARE_MUTEX(lock) struct sema lock
+#define u32           uint32_t
+#define list_del(head, ent)      TAILQ_REMOVE(head, ent, list) 
+#define simple_strtoul strtoul
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+#define list_empty    TAILQ_EMPTY
+
+extern struct xendev_list_head xenbus_device_backend_list;
+#if 0
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+				 int num_envp, char *buffer, int buffer_size);
+#endif
+static int xenbus_probe_backend(const char *type, const char *domid);
+
+static int read_frontend_details(struct xenbus_device *xendev)
+{
+	return read_otherend_details(xendev, "frontend-id", "frontend");
+}
+
+/* backend/<type>/<fe-uuid>/<id> => <type>-<fe-domid>-<id> */
+static int backend_bus_id(char bus_id[BUS_ID_SIZE], const char *nodename)
+{
+	int domid, err;
+	const char *devid, *type, *frontend;
+	unsigned int typelen;
+
+	type = strchr(nodename, '/');
+	if (!type)
+		return -EINVAL;
+	type++;
+	typelen = strcspn(type, "/");
+	if (!typelen || type[typelen] != '/')
+		return -EINVAL;
+
+	devid = strrchr(nodename, '/') + 1;
+
+	err = xenbus_gather(XBT_NIL, nodename, "frontend-id", "%i", &domid,
+			    "frontend", NULL, &frontend,
+			    NULL);
+	if (err)
+		return err;
+	if (strlen(frontend) == 0)
+		err = -ERANGE;
+	if (!err && !xenbus_exists(XBT_NIL, frontend, ""))
+		err = -ENOENT;
+	kfree(frontend);
+
+	if (err)
+		return err;
+
+	if (snprintf(bus_id, BUS_ID_SIZE,
+		     "%.*s-%i-%s", typelen, type, domid, devid) >= BUS_ID_SIZE)
+		return -ENOSPC;
+	return 0;
+}
+
+static struct xen_bus_type xenbus_backend = {
+	.root = "backend",
+	.levels = 3, 		/* backend/type/<frontend>/<id> */
+	.get_bus_id = backend_bus_id,
+	.probe = xenbus_probe_backend,
+	.bus = &xenbus_device_backend_list,
+	
+#if 0
+	.error = -ENODEV,
+	.bus = {
+		.name     = "xen-backend",
+		.match    = xenbus_match,
+		.probe    = xenbus_dev_probe,
+		.remove   = xenbus_dev_remove,
+//		.shutdown = xenbus_dev_shutdown,
+		.uevent   = xenbus_uevent_backend,
+	},
+	.dev = {
+		.bus_id = "xen-backend",
+	},
+#endif	
+};
+
+#if 0
+static int xenbus_uevent_backend(struct device *dev, char **envp,
+				 int num_envp, char *buffer, int buffer_size)
+{
+	struct xenbus_device *xdev;
+	struct xenbus_driver *drv;
+	int i = 0;
+	int length = 0;
+
+	DPRINTK("");
+
+	if (dev == NULL)
+		return -ENODEV;
+
+	xdev = to_xenbus_device(dev);
+	if (xdev == NULL)
+		return -ENODEV;
+2
+	/* stuff we want to pass to /sbin/hotplug */
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_TYPE=%s", xdev->devicetype);
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_PATH=%s", xdev->nodename);
+
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_BASE_PATH=%s", xenbus_backend.root);
+
+	/* terminate, set to next free slot, shrink available space */
+	envp[i] = NULL;
+	envp = &envp[i];
+	num_envp -= i;
+	buffer = &buffer[length];
+	buffer_size -= length;
+
+	if (dev->driver) {
+		drv = to_xenbus_driver(dev->driver);
+		if (drv && drv->uevent)
+			return drv->uevent(xdev, envp, num_envp, buffer,
+					   buffer_size);
+	}
+
+	return 0;
+}
+#endif
+
+int xenbus_register_backend(struct xenbus_driver *drv)
+{
+	drv->read_otherend_details = read_frontend_details;
+
+	return xenbus_register_driver_common(drv, &xenbus_backend);
+}
+
+/* backend/<typename>/<frontend-uuid>/<name> */
+static int xenbus_probe_backend_unit(const char *dir,
+				     const char *type,
+				     const char *name)
+{
+	char *nodename;
+	int err;
+
+	nodename = kasprintf("%s/%s", dir, name);
+	if (!nodename)
+		return -ENOMEM;
+
+	DPRINTK("%s\n", nodename);
+
+	err = xenbus_probe_node(&xenbus_backend, type, nodename);
+	kfree(nodename);
+	return err;
+}
+
+/* backend/<typename>/<frontend-domid> */
+static int xenbus_probe_backend(const char *type, const char *domid)
+{
+	char *nodename;
+	int err = 0;
+	char **dir;
+	unsigned int i, dir_n = 0;
+
+	DPRINTK("");
+
+	nodename = kasprintf("%s/%s/%s", xenbus_backend.root, type, domid);
+	if (!nodename)
+		return -ENOMEM;
+
+	dir = xenbus_directory(XBT_NIL, nodename, "", &dir_n);
+	if (IS_ERR(dir)) {
+		kfree(nodename);
+		return PTR_ERR(dir);
+	}
+
+	for (i = 0; i < dir_n; i++) {
+		err = xenbus_probe_backend_unit(nodename, type, dir[i]);
+		if (err)
+			break;
+	}
+	kfree(dir);
+	kfree(nodename);
+	return err;
+}
+
+static void backend_changed(struct xenbus_watch *watch,
+			    const char **vec, unsigned int len)
+{
+	DPRINTK("");
+
+	dev_changed(vec[XS_WATCH_PATH], &xenbus_backend);
+}
+
+static struct xenbus_watch be_watch = {
+	.node = "backend",
+	.callback = backend_changed,
+};
+#if 0
+void xenbus_backend_suspend(int (*fn)(struct device *, void *))
+{
+	DPRINTK("");
+	if (!xenbus_backend.error)
+		bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+
+void xenbus_backend_resume(int (*fn)(struct device *, void *))
+{
+	DPRINTK("");
+	if (!xenbus_backend.error)
+		bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn);
+}
+#endif
+void xenbus_backend_probe_and_watch(void)
+{
+	xenbus_probe_devices(&xenbus_backend);
+	register_xenbus_watch(&be_watch);
+}
+
+#if 0
+void xenbus_backend_bus_register(void)
+{
+	xenbus_backend.error = bus_register(&xenbus_backend.bus);
+	if (xenbus_backend.error)
+		log(LOG_WARNING,
+		       "XENBUS: Error registering backend bus: %i\n",
+		       xenbus_backend.error);
+}
+
+void xenbus_backend_device_register(void)
+{
+	if (xenbus_backend.error)
+		return;
+
+	xenbus_backend.error = device_register(&xenbus_backend.dev);
+	if (xenbus_backend.error) {
+		bus_unregister(&xenbus_backend.bus);
+		log(LOG_WARNING,
+		       "XENBUS: Error registering backend device: %i\n",
+		       xenbus_backend.error);
+	}
+}
+#endif

Property changes on: xen/xenbus/xenbus_probe_backend.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbusvar.h
===================================================================
--- xen/xenbus/xenbusvar.h	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbusvar.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,259 @@
+/******************************************************************************
+ * xenbus.h
+ *
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 XenSource Ltd.
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _XEN_XENBUS_XENBUSVAR_H
+#define _XEN_XENBUS_XENBUSVAR_H
+
+#include <sys/queue.h>
+#include <sys/bus.h>
+#include <sys/eventhandler.h>
+#include <machine/xen/xen-os.h>
+#include <xen/interface/io/xenbus.h>
+#include <xen/interface/io/xs_wire.h>
+
+#include "xenbus_if.h"
+
+enum {
+	/*
+	 * Path of this device node.
+	 */
+	XENBUS_IVAR_NODE,
+
+	/*
+	 * The device type (e.g. vif, vbd).
+	 */
+	XENBUS_IVAR_TYPE,
+
+	/*
+	 * The state of this device (not the otherend's state).
+	 */
+	XENBUS_IVAR_STATE,
+
+	/*
+	 * Domain ID of the other end device.
+	 */
+	XENBUS_IVAR_OTHEREND_ID,
+
+	/*
+	 * Path of the other end device.
+	 */
+	XENBUS_IVAR_OTHEREND_PATH
+};
+
+/*
+ * Simplified accessors for xenbus devices
+ */
+#define	XENBUS_ACCESSOR(var, ivar, type) \
+	__BUS_ACCESSOR(xenbus, var, XENBUS, ivar, type)
+
+XENBUS_ACCESSOR(node,		NODE,			const char *)
+XENBUS_ACCESSOR(type,		TYPE,			const char *)
+XENBUS_ACCESSOR(state,		STATE,			enum xenbus_state)
+XENBUS_ACCESSOR(otherend_id,	OTHEREND_ID,		int)
+XENBUS_ACCESSOR(otherend_path,	OTHEREND_PATH,		const char *)
+
+/* Register callback to watch this node. */
+struct xenbus_watch
+{
+	LIST_ENTRY(xenbus_watch) list;
+
+	/* Path being watched. */
+	char *node;
+
+	/* Callback (executed in a process context with no locks held). */
+	void (*callback)(struct xenbus_watch *,
+			 const char **vec, unsigned int len);
+};
+
+typedef int (*xenstore_event_handler_t)(void *);
+
+struct xenbus_transaction
+{
+		uint32_t id;
+};
+
+#define XBT_NIL ((struct xenbus_transaction) { 0 })
+
+int xenbus_directory(struct xenbus_transaction t, const char *dir,
+    const char *node, unsigned int *num, char ***result);
+int xenbus_read(struct xenbus_transaction t, const char *dir,
+    const char *node, unsigned int *len, void **result);
+int xenbus_write(struct xenbus_transaction t, const char *dir,
+    const char *node, const char *string);
+int xenbus_mkdir(struct xenbus_transaction t, const char *dir,
+    const char *node);
+int xenbus_exists(struct xenbus_transaction t, const char *dir,
+    const char *node);
+int xenbus_rm(struct xenbus_transaction t, const char *dir, const char *node);
+int xenbus_transaction_start(struct xenbus_transaction *t);
+int xenbus_transaction_end(struct xenbus_transaction t, int abort);
+
+/*
+ * Single read and scanf: returns errno or zero. If scancountp is
+ * non-null, then number of items scanned is returned in *scanncountp.
+ */
+int xenbus_scanf(struct xenbus_transaction t,
+    const char *dir, const char *node, int *scancountp, const char *fmt, ...)
+	__attribute__((format(scanf, 5, 6)));
+
+/* Single printf and write: returns errno or 0. */
+int xenbus_printf(struct xenbus_transaction t,
+		  const char *dir, const char *node, const char *fmt, ...)
+	__attribute__((format(printf, 4, 5)));
+
+/*
+ * Generic read function: NULL-terminated triples of name,
+ * sprintf-style type string, and pointer. Returns 0 or errno.
+ */
+int xenbus_gather(struct xenbus_transaction t, const char *dir, ...);
+
+/* notifer routines for when the xenstore comes up */
+int register_xenstore_notifier(xenstore_event_handler_t func, void *arg, int priority);
+#if 0
+void unregister_xenstore_notifier();
+#endif
+int register_xenbus_watch(struct xenbus_watch *watch);
+void unregister_xenbus_watch(struct xenbus_watch *watch);
+void xs_suspend(void);
+void xs_resume(void);
+
+/* Used by xenbus_dev to borrow kernel's store connection. */
+int xenbus_dev_request_and_reply(struct xsd_sockmsg *msg, void **result);
+
+#if 0
+
+#define XENBUS_IS_ERR_READ(str) ({			\
+	if (!IS_ERR(str) && strlen(str) == 0) {		\
+		free(str, M_DEVBUF);				\
+		str = ERR_PTR(-ERANGE);			\
+	}						\
+	IS_ERR(str);					\
+})
+
+#endif
+
+#define XENBUS_EXIST_ERR(err) ((err) == ENOENT || (err) == ERANGE)
+
+
+/**
+ * Register a watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given callback function as the callback.  Return 0 on
+ * success, or errno on error.  On success, the given path will be saved as
+ * watch->node, and remains the caller's to free.  On error, watch->node will
+ * be NULL, the device will switch to XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
+int xenbus_watch_path(device_t dev, char *path,
+		      struct xenbus_watch *watch, 
+		      void (*callback)(struct xenbus_watch *,
+				       const char **, unsigned int));
+
+
+/**
+ * Register a watch on the given path/path2, using the given xenbus_watch
+ * structure for storage, and the given callback function as the callback.
+ * Return 0 on success, or errno on error.  On success, the watched path
+ * (path/path2) will be saved as watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_path2(device_t dev, const char *path,
+		       const char *path2, struct xenbus_watch *watch, 
+		       void (*callback)(struct xenbus_watch *,
+					const char **, unsigned int));
+
+
+/**
+ * Advertise in the store a change of the given driver to the given new_state.
+ * which case this is performed inside its own transaction.  Return 0 on
+ * success, or errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(device_t dev,
+			XenbusState new_state);
+
+
+/**
+ * Grant access to the given ring_mfn to the peer of the given device.
+ * Return 0 on success, or errno on error.  On error, the device will
+ * switch to XenbusStateClosing, and the error will be saved in the
+ * store. The grant ring reference is returned in *refp.
+ */
+int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, int *refp);
+
+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_alloc_evtchn(device_t dev, int *port);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or errno on error.
+ */
+int xenbus_free_evtchn(device_t dev, int port);
+
+
+/**
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateClosed if no state can be read.
+ */
+XenbusState xenbus_read_driver_state(const char *path);
+
+
+/***
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(device_t dev, int err, const char *fmt,
+		      ...);
+
+
+/***
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+void xenbus_dev_fatal(device_t dev, int err, const char *fmt,
+		      ...);
+
+int xenbus_dev_init(void);
+
+const char *xenbus_strstate(enum xenbus_state state);
+int xenbus_dev_is_online(device_t dev);
+int xenbus_frontend_closed(device_t dev);
+
+#endif /* _XEN_XENBUS_XENBUSVAR_H */

Property changes on: xen/xenbus/xenbusvar.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen/xenbus.h:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen/xenbus.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: xen/xenbus/xenbus_comms.h
===================================================================
--- xen/xenbus/xenbus_comms.h	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_comms.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,48 @@
+/*
+ * Private include for xenbus communications.
+ * 
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ *
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _XENBUS_COMMS_H
+#define _XENBUS_COMMS_H
+
+struct sx;
+extern int xen_store_evtchn;
+extern char *xen_store;
+
+int xs_init(void);
+int xb_init_comms(void);
+
+/* Low level routines. */
+int xb_write(const void *data, unsigned len, struct lock_object *);
+int xb_read(void *data, unsigned len, struct lock_object *);
+extern int xenbus_running;
+
+char *kasprintf(const char *fmt, ...);
+
+
+#endif /* _XENBUS_COMMS_H */

Property changes on: xen/xenbus/xenbus_comms.h
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbus_probe.c
===================================================================
--- xen/xenbus/xenbus_probe.c	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_probe.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,603 @@
+/******************************************************************************
+ * Talks to Xen Store to figure out what devices we have.
+ *
+ * Copyright (C) 2008 Doug Rabson
+ * Copyright (C) 2005 Rusty Russell, IBM Corporation
+ * Copyright (C) 2005 Mike Wray, Hewlett-Packard
+ * Copyright (C) 2005 XenSource Ltd
+ * 
+ * This file may be distributed separately from the Linux kernel, or
+ * incorporated into other software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#if 0
+#define DPRINTK(fmt, args...) \
+    printf("xenbus_probe (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
+#else
+#define DPRINTK(fmt, args...) ((void)0)
+#endif
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/sysctl.h>
+#include <sys/syslog.h>
+#include <sys/systm.h>
+#include <sys/sx.h>
+#include <sys/taskqueue.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/stdarg.h>
+
+#include <xen/gnttab.h>
+#include <xen/xenbus/xenbusvar.h>
+#include <xen/xenbus/xenbus_comms.h>
+
+struct xenbus_softc {
+	struct xenbus_watch xs_devicewatch;
+	struct task	xs_probechildren;
+	struct intr_config_hook xs_attachcb;
+	device_t	xs_dev;
+};
+
+struct xenbus_device_ivars {
+	struct xenbus_watch xd_otherend_watch; /* must be first */
+	struct sx	xd_lock;
+	device_t	xd_dev;
+	char		*xd_node;	/* node name in xenstore */
+	char		*xd_type;	/* xen device type */
+	enum xenbus_state xd_state;
+	int		xd_otherend_id;
+	char		*xd_otherend_path;
+};
+
+/* Simplified asprintf. */
+char *
+kasprintf(const char *fmt, ...)
+{
+	va_list ap;
+	unsigned int len;
+	char *p, dummy[1];
+
+	va_start(ap, fmt);
+	/* FIXME: vsnprintf has a bug, NULL should work */
+	len = vsnprintf(dummy, 0, fmt, ap);
+	va_end(ap);
+
+	p = malloc(len + 1, M_DEVBUF, M_WAITOK);
+	va_start(ap, fmt);
+	vsprintf(p, fmt, ap);
+	va_end(ap);
+	return p;
+}
+
+static void
+xenbus_identify(driver_t *driver, device_t parent)
+{
+
+	BUS_ADD_CHILD(parent, 0, "xenbus", 0);
+}
+
+static int 
+xenbus_probe(device_t dev)
+{
+	int err = 0;
+
+	DPRINTK("");
+
+	/* Initialize the interface to xenstore. */
+	err = xs_init(); 
+	if (err) {
+		log(LOG_WARNING,
+		    "XENBUS: Error initializing xenstore comms: %i\n", err);
+		return (ENXIO);
+	}
+	err = gnttab_init();
+	if (err) {
+		log(LOG_WARNING,
+		    "XENBUS: Error initializing grant table: %i\n", err);
+		return (ENXIO);
+	}
+	device_set_desc(dev, "Xen Devices");
+
+	return (0);
+}
+
+static enum xenbus_state
+xenbus_otherend_state(struct xenbus_device_ivars *ivars)
+{
+
+	return (xenbus_read_driver_state(ivars->xd_otherend_path));
+}
+
+static void
+xenbus_backend_changed(struct xenbus_watch *watch, const char **vec,
+    unsigned int len)
+{
+	struct xenbus_device_ivars *ivars;
+	device_t dev;
+	enum xenbus_state newstate;
+
+	ivars = (struct xenbus_device_ivars *) watch;
+	dev = ivars->xd_dev;
+
+	if (!ivars->xd_otherend_path
+	    || strncmp(ivars->xd_otherend_path, vec[XS_WATCH_PATH],
+		strlen(ivars->xd_otherend_path)))
+		return;
+
+	newstate = xenbus_otherend_state(ivars);
+	XENBUS_BACKEND_CHANGED(dev, newstate);
+}
+
+static int
+xenbus_device_exists(device_t dev, const char *node)
+{
+	device_t *kids;
+	struct xenbus_device_ivars *ivars;
+	int i, count, result;
+
+	if (device_get_children(dev, &kids, &count))
+		return (FALSE);
+
+	result = FALSE;
+	for (i = 0; i < count; i++) {
+		ivars = device_get_ivars(kids[i]);
+		if (!strcmp(ivars->xd_node, node)) {
+			result = TRUE;
+			break;
+		}
+	}
+	free(kids, M_TEMP);
+
+	return (result);
+}
+
+static int
+xenbus_add_device(device_t dev, const char *bus,
+    const char *type, const char *id)
+{
+	device_t child;
+	struct xenbus_device_ivars *ivars;
+	enum xenbus_state state;
+	char *statepath;
+	int error;
+
+	ivars = malloc(sizeof(struct xenbus_device_ivars),
+	    M_DEVBUF, M_ZERO|M_WAITOK);
+	ivars->xd_node = kasprintf("%s/%s/%s", bus, type, id);
+
+	if (xenbus_device_exists(dev, ivars->xd_node)) {
+		/*
+		 * We are already tracking this node
+		 */
+		free(ivars->xd_node, M_DEVBUF);
+		free(ivars, M_DEVBUF);
+		return (0);
+	}
+
+	state = xenbus_read_driver_state(ivars->xd_node);
+
+	if (state != XenbusStateInitialising) {
+		/*
+		 * Device is not new, so ignore it. This can
+		 * happen if a device is going away after
+		 * switching to Closed.
+		 */
+		free(ivars->xd_node, M_DEVBUF);
+		free(ivars, M_DEVBUF);
+		return (0);
+	}
+
+	/*
+	 * Find the backend details
+	 */
+	error = xenbus_gather(XBT_NIL, ivars->xd_node,
+	    "backend-id", "%i", &ivars->xd_otherend_id,
+	    "backend", NULL, &ivars->xd_otherend_path,
+	    NULL);
+	if (error)
+		return (error);
+
+	sx_init(&ivars->xd_lock, "xdlock");
+	ivars->xd_type = strdup(type, M_DEVBUF);
+	ivars->xd_state = XenbusStateInitialising;
+
+	statepath = malloc(strlen(ivars->xd_otherend_path)
+	    + strlen("/state") + 1, M_DEVBUF, M_WAITOK);
+	sprintf(statepath, "%s/state", ivars->xd_otherend_path);
+
+	ivars->xd_otherend_watch.node = statepath;
+	ivars->xd_otherend_watch.callback = xenbus_backend_changed;
+
+	child = device_add_child(dev, NULL, -1);
+	ivars->xd_dev = child;
+	device_set_ivars(child, ivars);
+
+	return (0);
+}
+
+static int
+xenbus_enumerate_type(device_t dev, const char *bus, const char *type)
+{
+	char **dir;
+	unsigned int i, count;
+	int error;
+
+	error = xenbus_directory(XBT_NIL, bus, type, &count, &dir);
+	if (error)
+		return (error);
+	for (i = 0; i < count; i++)
+		xenbus_add_device(dev, bus, type, dir[i]);
+
+	free(dir, M_DEVBUF);
+
+	return (0);
+}
+
+static int
+xenbus_enumerate_bus(device_t dev, const char *bus)
+{
+	char **dir;
+	unsigned int i, count;
+	int error;
+
+	error = xenbus_directory(XBT_NIL, bus, "", &count, &dir);
+	if (error)
+		return (error);
+	for (i = 0; i < count; i++) {
+		xenbus_enumerate_type(dev, bus, dir[i]);
+	}
+	free(dir, M_DEVBUF);
+
+	return (0);
+}
+
+static int
+xenbus_probe_children(device_t dev)
+{
+	device_t *kids;
+	struct xenbus_device_ivars *ivars;
+	int i, count;
+
+	/*
+	 * Probe any new devices and register watches for any that
+	 * attach successfully. Since part of the protocol which
+	 * establishes a connection with the other end is interrupt
+	 * driven, we sleep until the device reaches a stable state
+	 * (closed or connected).
+	 */
+	if (device_get_children(dev, &kids, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			if (device_get_state(kids[i]) != DS_NOTPRESENT)
+				continue;
+
+			if (device_probe_and_attach(kids[i]))
+				continue;
+			ivars = device_get_ivars(kids[i]);
+			register_xenbus_watch(
+				&ivars->xd_otherend_watch);
+			sx_xlock(&ivars->xd_lock);
+			while (ivars->xd_state != XenbusStateClosed
+			    && ivars->xd_state != XenbusStateConnected)
+				sx_sleep(&ivars->xd_state, &ivars->xd_lock,
+				    0, "xdattach", 0);
+			sx_xunlock(&ivars->xd_lock);
+		}
+		free(kids, M_TEMP);
+	}
+
+	return (0);
+}
+
+static void
+xenbus_probe_children_cb(void *arg, int pending)
+{
+	device_t dev = (device_t) arg;
+
+	xenbus_probe_children(dev);
+}
+
+static void
+xenbus_devices_changed(struct xenbus_watch *watch,
+    const char **vec, unsigned int len)
+{
+	struct xenbus_softc *sc = (struct xenbus_softc *) watch;
+	device_t dev = sc->xs_dev;
+	char *node, *bus, *type, *id, *p;
+
+	node = strdup(vec[XS_WATCH_PATH], M_DEVBUF);;
+	p = strchr(node, '/');
+	if (!p)
+		goto out;
+	bus = node;
+	*p = 0;
+	type = p + 1;
+
+	p = strchr(type, '/');
+	if (!p)
+		goto out;
+	*p = 0;
+	id = p + 1;
+
+	p = strchr(id, '/');
+	if (p)
+		*p = 0;
+
+	xenbus_add_device(dev, bus, type, id);
+	taskqueue_enqueue(taskqueue_thread, &sc->xs_probechildren);
+out:
+	free(node, M_DEVBUF);
+}
+
+static void
+xenbus_attach_deferred(void *arg)
+{
+	device_t dev = (device_t) arg;
+	struct xenbus_softc *sc = device_get_softc(dev);
+	int error;
+	
+	error = xenbus_enumerate_bus(dev, "device");
+	if (error)
+		return;
+	xenbus_probe_children(dev);
+
+	sc->xs_dev = dev;
+	sc->xs_devicewatch.node = "device";
+	sc->xs_devicewatch.callback = xenbus_devices_changed;
+
+	TASK_INIT(&sc->xs_probechildren, 0, xenbus_probe_children_cb, dev);
+
+	register_xenbus_watch(&sc->xs_devicewatch);
+
+	config_intrhook_disestablish(&sc->xs_attachcb);
+}
+
+static int
+xenbus_attach(device_t dev)
+{
+	struct xenbus_softc *sc = device_get_softc(dev);
+
+	sc->xs_attachcb.ich_func = xenbus_attach_deferred;
+	sc->xs_attachcb.ich_arg = dev;
+	config_intrhook_establish(&sc->xs_attachcb);
+
+	return (0);
+}
+
+static int
+xenbus_suspend(device_t dev)
+{
+	int error;
+
+	DPRINTK("");
+
+	error = bus_generic_suspend(dev);
+	if (error)
+		return (error);
+
+	xs_suspend();
+
+	return (0);
+}
+
+static int
+xenbus_resume(device_t dev)
+{
+	device_t *kids;
+	struct xenbus_device_ivars *ivars;
+	int i, count, error;
+	char *statepath;
+
+	xb_init_comms();
+	xs_resume();
+
+	/*
+	 * We must re-examine each device and find the new path for
+	 * its backend.
+	 */
+	if (device_get_children(dev, &kids, &count) == 0) {
+		for (i = 0; i < count; i++) {
+			if (device_get_state(kids[i]) == DS_NOTPRESENT)
+				continue;
+
+			ivars = device_get_ivars(kids[i]);
+
+			unregister_xenbus_watch(
+				&ivars->xd_otherend_watch);
+			ivars->xd_state = XenbusStateInitialising;
+
+			/*
+			 * Find the new backend details and
+			 * re-register our watch.
+			 */
+			free(ivars->xd_otherend_path, M_DEVBUF);
+			error = xenbus_gather(XBT_NIL, ivars->xd_node,
+			    "backend-id", "%i", &ivars->xd_otherend_id,
+			    "backend", NULL, &ivars->xd_otherend_path,
+			    NULL);
+			if (error)
+				return (error);
+
+			DEVICE_RESUME(kids[i]);
+
+			statepath = malloc(strlen(ivars->xd_otherend_path)
+			    + strlen("/state") + 1, M_DEVBUF, M_WAITOK);
+			sprintf(statepath, "%s/state", ivars->xd_otherend_path);
+
+			free(ivars->xd_otherend_watch.node, M_DEVBUF);
+			ivars->xd_otherend_watch.node = statepath;
+			register_xenbus_watch(
+				&ivars->xd_otherend_watch);
+
+#if 0
+			/*
+			 * Can't do this yet since we are running in
+			 * the xenwatch thread and if we sleep here,
+			 * we will stop delivering watch notifications
+			 * and the device will never come back online.
+			 */
+			sx_xlock(&ivars->xd_lock);
+			while (ivars->xd_state != XenbusStateClosed
+			    && ivars->xd_state != XenbusStateConnected)
+				sx_sleep(&ivars->xd_state, &ivars->xd_lock,
+				    0, "xdresume", 0);
+			sx_xunlock(&ivars->xd_lock);
+#endif
+		}
+		free(kids, M_TEMP);
+	}
+
+	return (0);
+}
+
+static int
+xenbus_print_child(device_t dev, device_t child)
+{
+	struct xenbus_device_ivars *ivars = device_get_ivars(child);
+	int	retval = 0;
+
+	retval += bus_print_child_header(dev, child);
+	retval += printf(" at %s", ivars->xd_node);
+	retval += bus_print_child_footer(dev, child);
+
+	return (retval);
+}
+
+static int
+xenbus_read_ivar(device_t dev, device_t child, int index,
+    uintptr_t * result)
+{
+	struct xenbus_device_ivars *ivars = device_get_ivars(child);
+
+	switch (index) {
+	case XENBUS_IVAR_NODE:
+		*result = (uintptr_t) ivars->xd_node;
+		return (0);
+
+	case XENBUS_IVAR_TYPE:
+		*result = (uintptr_t) ivars->xd_type;
+		return (0);
+
+	case XENBUS_IVAR_STATE:
+		*result = (uintptr_t) ivars->xd_state;
+		return (0);
+
+	case XENBUS_IVAR_OTHEREND_ID:
+		*result = (uintptr_t) ivars->xd_otherend_id;
+		return (0);
+
+	case XENBUS_IVAR_OTHEREND_PATH:
+		*result = (uintptr_t) ivars->xd_otherend_path;
+		return (0);
+	}
+
+	return (ENOENT);
+}
+
+static int
+xenbus_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
+{
+	struct xenbus_device_ivars *ivars = device_get_ivars(child);
+	enum xenbus_state newstate;
+	int currstate;
+	int error;
+
+	switch (index) {
+	case XENBUS_IVAR_STATE:
+		newstate = (enum xenbus_state) value;
+		sx_xlock(&ivars->xd_lock);
+		if (ivars->xd_state == newstate)
+			goto out;
+
+		error = xenbus_scanf(XBT_NIL, ivars->xd_node, "state",
+		    NULL, "%d", &currstate);
+		if (error)
+			goto out;
+
+		error = xenbus_printf(XBT_NIL, ivars->xd_node, "state",
+		    "%d", newstate);
+		if (error) {
+			if (newstate != XenbusStateClosing) /* Avoid looping */
+				xenbus_dev_fatal(dev, error, "writing new state");
+			goto out;
+		}
+		ivars->xd_state = newstate;
+		wakeup(&ivars->xd_state);
+	out:
+		sx_xunlock(&ivars->xd_lock);
+		return (0);
+
+	case XENBUS_IVAR_NODE:
+	case XENBUS_IVAR_TYPE:
+	case XENBUS_IVAR_OTHEREND_ID:
+	case XENBUS_IVAR_OTHEREND_PATH:
+		/*
+		 * These variables are read-only.
+		 */
+		return (EINVAL);
+	}
+
+	return (ENOENT);
+}
+
+SYSCTL_DECL(_dev);
+SYSCTL_NODE(_dev, OID_AUTO, xen, CTLFLAG_RD, NULL, "Xen");
+SYSCTL_INT(_dev_xen, OID_AUTO, xsd_port, CTLFLAG_RD, &xen_store_evtchn, 0, "");
+SYSCTL_ULONG(_dev_xen, OID_AUTO, xsd_kva, CTLFLAG_RD, (u_long *) &xen_store, 0, "");
+
+static device_method_t xenbus_methods[] = { 
+	/* Device interface */ 
+	DEVMETHOD(device_identify,	xenbus_identify),
+	DEVMETHOD(device_probe,         xenbus_probe), 
+	DEVMETHOD(device_attach,        xenbus_attach), 
+	DEVMETHOD(device_detach,        bus_generic_detach), 
+	DEVMETHOD(device_shutdown,      bus_generic_shutdown), 
+	DEVMETHOD(device_suspend,       xenbus_suspend), 
+	DEVMETHOD(device_resume,        xenbus_resume), 
+ 
+	/* Bus interface */ 
+	DEVMETHOD(bus_print_child,      xenbus_print_child),
+	DEVMETHOD(bus_read_ivar,        xenbus_read_ivar), 
+	DEVMETHOD(bus_write_ivar,       xenbus_write_ivar), 
+ 
+	{ 0, 0 } 
+}; 
+
+static char driver_name[] = "xenbus";
+static driver_t xenbus_driver = { 
+	driver_name, 
+	xenbus_methods, 
+	sizeof(struct xenbus_softc),
+}; 
+devclass_t xenbus_devclass; 
+ 
+#ifdef XENHVM
+DRIVER_MODULE(xenbus, xenpci, xenbus_driver, xenbus_devclass, 0, 0);
+#else
+DRIVER_MODULE(xenbus, nexus, xenbus_driver, xenbus_devclass, 0, 0);
+#endif

Property changes on: xen/xenbus/xenbus_probe.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/xenbus/xenbus_if.m
===================================================================
--- xen/xenbus/xenbus_if.m	(.../stable/6/sys)	(revision 0)
+++ xen/xenbus/xenbus_if.m	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,37 @@
+#-
+# Copyright (c) 2008 Doug Rabson
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# $FreeBSD$
+#
+
+#include <sys/bus.h>
+#include <xen/interface/io/xenbus.h>
+
+INTERFACE xenbus;
+
+METHOD int backend_changed {
+	device_t dev;
+	enum xenbus_state newstate;
+};
Index: xen/gnttab.h
===================================================================
--- xen/gnttab.h	(.../stable/6/sys)	(revision 0)
+++ xen/gnttab.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,164 @@
+/******************************************************************************
+ * gnttab.h
+ * 
+ * Two sets of functionality:
+ * 1. Granting foreign access to our memory reservation.
+ * 2. Accessing others' memory reservations via grant references.
+ * (i.e., mechanisms for both sender and recipient of grant references)
+ * 
+ * Copyright (c) 2004-2005, K A Fraser
+ * Copyright (c) 2005, Christopher Clark
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __ASM_GNTTAB_H__
+
+#include <xen/interface/grant_table.h>
+
+#include <xen/hypervisor.h>
+#include <xen/interface/grant_table.h>
+#include <machine/xen/xen-os.h>
+#include <xen/features.h>
+
+struct gnttab_free_callback {
+	struct gnttab_free_callback *next;
+	void (*fn)(void *);
+	void *arg;
+	uint16_t count;
+};
+
+int gnttab_init(void);
+
+/*
+ * Allocate a grant table reference and return it in *result. Returns
+ * zero on success or errno on error.
+ */
+int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
+    int flags, grant_ref_t *result);
+
+/*
+ * End access through the given grant reference, iff the grant entry is no
+ * longer in use.  Return 1 if the grant entry was freed, 0 if it is still in
+ * use.
+ */
+int gnttab_end_foreign_access_ref(grant_ref_t ref);
+
+/*
+ * Eventually end access through the given grant reference, and once that
+ * access has been ended, free the given page too.  Access will be ended
+ * immediately iff the grant entry is not in use, otherwise it will happen
+ * some time later.  page may be 0, in which case no freeing will occur.
+ */
+void gnttab_end_foreign_access(grant_ref_t ref, void *page);
+
+int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn, grant_ref_t *result);
+
+unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
+unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
+
+int gnttab_query_foreign_access(grant_ref_t ref);
+
+/*
+ * operations on reserved batches of grant references
+ */
+int gnttab_alloc_grant_references(uint16_t count, grant_ref_t *pprivate_head);
+
+void gnttab_free_grant_reference(grant_ref_t ref);
+
+void gnttab_free_grant_references(grant_ref_t head);
+
+int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
+
+int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
+
+void gnttab_release_grant_reference(grant_ref_t *private_head,
+				    grant_ref_t release);
+
+void gnttab_request_free_callback(struct gnttab_free_callback *callback,
+				  void (*fn)(void *), void *arg, uint16_t count);
+void gnttab_cancel_free_callback(struct gnttab_free_callback *callback);
+
+void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
+				     unsigned long frame, int flags);
+
+void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid,
+				       unsigned long pfn);
+
+int gnttab_suspend(void);
+int gnttab_resume(void);
+
+#if 0
+
+#include <xen/features.h>
+
+static inline void
+gnttab_set_map_op(struct gnttab_map_grant_ref *map, vm_paddr_t addr,
+		  uint32_t flags, grant_ref_t ref, domid_t domid)
+{
+	if (flags & GNTMAP_contains_pte)
+		map->host_addr = addr;
+	else if (xen_feature(XENFEAT_auto_translated_physmap))
+		map->host_addr = vtophys(addr);
+	else
+		map->host_addr = addr;
+
+	map->flags = flags;
+	map->ref = ref;
+	map->dom = domid;
+}
+
+static inline void
+gnttab_set_unmap_op(struct gnttab_unmap_grant_ref *unmap, vm_paddr_t addr,
+		    uint32_t flags, grant_handle_t handle)
+{
+	if (flags & GNTMAP_contains_pte)
+		unmap->host_addr = addr;
+	else if (xen_feature(XENFEAT_auto_translated_physmap))
+		unmap->host_addr = vtophys(addr);
+	else
+		unmap->host_addr = addr;
+
+	unmap->handle = handle;
+	unmap->dev_bus_addr = 0;
+}
+
+static inline void
+gnttab_set_replace_op(struct gnttab_unmap_and_replace *unmap, vm_paddr_t addr,
+		      vm_paddr_t new_addr, grant_handle_t handle)
+{
+	if (xen_feature(XENFEAT_auto_translated_physmap)) {
+		unmap->host_addr = vtophys(addr);
+		unmap->new_addr = vtophys(new_addr);
+	} else {
+		unmap->host_addr = addr;
+		unmap->new_addr = new_addr;
+	}
+
+	unmap->handle = handle;
+}
+#endif
+
+#endif /* __ASM_GNTTAB_H__ */

Property changes on: xen/gnttab.h
___________________________________________________________________
Added: fbsd:nokeywords
   + true

Index: xen/evtchn/evtchn_dev.c
===================================================================
--- xen/evtchn/evtchn_dev.c	(.../stable/6/sys)	(revision 0)
+++ xen/evtchn/evtchn_dev.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,392 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Xenolinux driver for receiving and demuxing event-channel signals.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/selinfo.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/ioccom.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/xen_intr.h>
+#include <machine/bus.h>
+#include <sys/rman.h>
+#include <machine/resource.h>
+#include <machine/xen/synch_bitops.h>
+#include <xen/hypervisor.h>
+#include <xen/evtchn.h>
+
+
+typedef struct evtchn_sotfc {
+
+	struct selinfo  ev_rsel;
+} evtchn_softc_t;
+
+
+#ifdef linuxcrap
+/* NB. This must be shared amongst drivers if more things go in /dev/xen */
+static devfs_handle_t xen_dev_dir;
+#endif
+
+/* Only one process may open /dev/xen/evtchn at any time. */
+static unsigned long evtchn_dev_inuse;
+
+/* Notification ring, accessed via /dev/xen/evtchn. */
+
+#define EVTCHN_RING_SIZE     2048  /* 2048 16-bit entries */
+
+#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+static uint16_t *ring;
+static unsigned int ring_cons, ring_prod, ring_overflow;
+
+/* Which ports is user-space bound to? */
+static uint32_t bound_ports[32];
+
+/* Unique address for processes to sleep on */
+static void *evtchn_waddr = &ring;
+
+static struct mtx lock, upcall_lock;
+
+static d_read_t      evtchn_read;
+static d_write_t     evtchn_write;
+static d_ioctl_t     evtchn_ioctl;
+static d_poll_t      evtchn_poll;
+static d_open_t      evtchn_open;
+static d_close_t     evtchn_close;
+
+
+void 
+evtchn_device_upcall(int port)
+{
+	mtx_lock(&upcall_lock);
+
+	mask_evtchn(port);
+	clear_evtchn(port);
+
+	if ( ring != NULL ) {
+		if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) {
+			ring[EVTCHN_RING_MASK(ring_prod)] = (uint16_t)port;
+			if ( ring_cons == ring_prod++ ) {
+				wakeup(evtchn_waddr);
+			}
+		}
+		else {
+			ring_overflow = 1;
+		}
+	}
+
+	mtx_unlock(&upcall_lock);
+}
+
+static void 
+__evtchn_reset_buffer_ring(void)
+{
+	/* Initialise the ring to empty. Clear errors. */
+	ring_cons = ring_prod = ring_overflow = 0;
+}
+
+static int
+evtchn_read(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int rc;
+	unsigned int count, c, p, sst = 0, bytes1 = 0, bytes2 = 0;
+	count = uio->uio_resid;
+    
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 )
+	{
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	for ( ; ; ) {
+		if ( (c = ring_cons) != (p = ring_prod) )
+			break;
+
+		if ( ring_overflow ) {
+			rc = EFBIG;
+			goto out;
+		}
+
+		if (sst != 0) {
+			rc = EINTR;
+			goto out;
+		}
+
+		/* PCATCH == check for signals before and after sleeping 
+		 * PWAIT == priority of waiting on resource 
+		 */
+		sst = tsleep(evtchn_waddr, PWAIT|PCATCH, "evchwt", 10);
+	}
+
+	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
+	if ( ((c ^ p) & EVTCHN_RING_SIZE) != 0 ) {
+		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) * sizeof(uint16_t);
+		bytes2 = EVTCHN_RING_MASK(p) * sizeof(uint16_t);
+	}
+	else {
+		bytes1 = (p - c) * sizeof(uint16_t);
+		bytes2 = 0;
+	}
+
+	/* Truncate chunks according to caller's maximum byte count. */
+	if ( bytes1 > count ) {
+		bytes1 = count;
+		bytes2 = 0;
+	}
+	else if ( (bytes1 + bytes2) > count ) {
+		bytes2 = count - bytes1;
+	}
+    
+	if ( uiomove(&ring[EVTCHN_RING_MASK(c)], bytes1, uio) ||
+	     ((bytes2 != 0) && uiomove(&ring[0], bytes2, uio)))
+		/* keeping this around as its replacement is not equivalent 
+		 * copyout(&ring[0], &buf[bytes1], bytes2) 
+		 */
+	{
+		rc = EFAULT;
+		goto out;
+	}
+
+	ring_cons += (bytes1 + bytes2) / sizeof(uint16_t);
+
+	rc = bytes1 + bytes2;
+
+ out:
+    
+	return rc;
+}
+
+static int 
+evtchn_write(struct cdev *dev, struct uio *uio, int ioflag)
+{
+	int  rc, i, count;
+    
+	count = uio->uio_resid;
+    
+	uint16_t *kbuf = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK);
+
+
+	if ( kbuf == NULL )
+		return ENOMEM;
+
+	count &= ~1; /* even number of bytes */
+
+	if ( count == 0 ) {
+		rc = 0;
+		goto out;
+	}
+
+	if ( count > PAGE_SIZE )
+		count = PAGE_SIZE;
+
+	if ( uiomove(kbuf, count, uio) != 0 ) {
+		rc = EFAULT;
+		goto out;
+	}
+
+	mtx_lock_spin(&lock);
+	for ( i = 0; i < (count/2); i++ )
+		if ( test_bit(kbuf[i], &bound_ports[0]) )
+			unmask_evtchn(kbuf[i]);
+	mtx_unlock_spin(&lock);
+
+	rc = count;
+
+ out:
+	free(kbuf, M_DEVBUF);
+	return rc;
+}
+
+static int 
+evtchn_ioctl(struct cdev *dev, unsigned long cmd, caddr_t arg, 
+	     int mode, struct thread *td __unused)
+{
+	int rc = 0;
+    
+	mtx_lock_spin(&lock);
+    
+	switch ( cmd )
+	{
+	case EVTCHN_RESET:
+		__evtchn_reset_buffer_ring();
+		break;
+	case EVTCHN_BIND:
+		if ( !synch_test_and_set_bit((uintptr_t)arg, &bound_ports[0]) )
+			unmask_evtchn((uintptr_t)arg);
+		else
+			rc = EINVAL;
+		break;
+	case EVTCHN_UNBIND:
+		if ( synch_test_and_clear_bit((uintptr_t)arg, &bound_ports[0]) )
+			mask_evtchn((uintptr_t)arg);
+		else
+			rc = EINVAL;
+		break;
+	default:
+		rc = ENOSYS;
+		break;
+	}
+
+	mtx_unlock_spin(&lock);   
+
+	return rc;
+}
+
+static int
+evtchn_poll(struct cdev *dev, int poll_events, struct thread *td)
+{
+
+	evtchn_softc_t *sc;
+	unsigned int mask = POLLOUT | POLLWRNORM;
+    
+	sc = dev->si_drv1;
+    
+	if ( ring_cons != ring_prod )
+		mask |= POLLIN | POLLRDNORM;
+	else if ( ring_overflow )
+		mask = POLLERR;
+	else
+		selrecord(td, &sc->ev_rsel);
+
+
+	return mask;
+}
+
+
+static int 
+evtchn_open(struct cdev *dev, int flag, int otyp, struct thread *td)
+{
+	uint16_t *_ring;
+    
+	if (flag & O_NONBLOCK)
+		return EBUSY;
+
+	if ( synch_test_and_set_bit(0, &evtchn_dev_inuse) )
+		return EBUSY;
+
+	if ( (_ring = (uint16_t *)malloc(PAGE_SIZE, M_DEVBUF, M_WAITOK)) == NULL )
+		return ENOMEM;
+
+	mtx_lock_spin(&lock);
+	ring = _ring;
+	__evtchn_reset_buffer_ring();
+	mtx_unlock_spin(&lock);
+
+
+	return 0;
+}
+
+static int 
+evtchn_close(struct cdev *dev, int flag, int otyp, struct thread *td __unused)
+{
+	int i;
+
+	mtx_lock_spin(&lock);
+	if (ring != NULL) {
+		free(ring, M_DEVBUF);
+		ring = NULL;
+	}
+	for ( i = 0; i < NR_EVENT_CHANNELS; i++ )
+		if ( synch_test_and_clear_bit(i, &bound_ports[0]) )
+			mask_evtchn(i);
+	mtx_unlock_spin(&lock);
+
+	evtchn_dev_inuse = 0;
+
+	return 0;
+}
+
+static struct cdevsw evtchn_devsw = {
+	d_version:   D_VERSION,
+	d_open:      evtchn_open,
+	d_close:     evtchn_close,
+	d_read:      evtchn_read,
+	d_write:     evtchn_write,
+	d_ioctl:     evtchn_ioctl,
+	d_poll:      evtchn_poll,
+	d_name:      "evtchn",
+	d_flags:     0,
+};
+
+
+/* XXX  - if this device is ever supposed to support use by more than one process
+ * this global static will have to go away
+ */
+static struct cdev *evtchn_dev;
+
+
+
+static int 
+evtchn_dev_init(void *dummy __unused)
+{
+	/* XXX I believe we don't need these leaving them here for now until we 
+	 * have some semblance of it working 
+	 */
+	mtx_init(&upcall_lock, "evtchup", NULL, MTX_DEF);
+
+	/* (DEVFS) create '/dev/misc/evtchn'. */
+	evtchn_dev = make_dev(&evtchn_devsw, 0, UID_ROOT, GID_WHEEL, 0600, "xen/evtchn");
+
+	mtx_init(&lock, "evch", NULL, MTX_SPIN | MTX_NOWITNESS);
+
+	evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK);
+	bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t));
+
+	/* XXX I don't think we need any of this rubbish */
+#if 0
+	if ( err != 0 )
+	{
+		printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+		return err;
+	}
+
+	/* (DEVFS) create directory '/dev/xen'. */
+	xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL);
+
+	/* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */
+	pos = devfs_generate_path(evtchn_miscdev.devfs_handle, 
+				  &link_dest[3], 
+				  sizeof(link_dest) - 3);
+	if ( pos >= 0 )
+		strncpy(&link_dest[pos], "../", 3);
+	/* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */
+	(void)devfs_mk_symlink(xen_dev_dir, 
+			       "evtchn", 
+			       DEVFS_FL_DEFAULT, 
+			       &link_dest[pos],
+			       &symlink_handle, 
+			       NULL);
+
+	/* (DEVFS) automatically destroy the symlink with its destination. */
+	devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle);
+#endif
+	if (bootverbose)
+		printf("Event-channel device installed.\n");
+
+	return 0;
+}
+
+SYSINIT(evtchn_dev_init, SI_SUB_DRIVERS, SI_ORDER_FIRST, evtchn_dev_init, NULL);
+
+

Property changes on: xen/evtchn/evtchn_dev.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/evtchn/evtchn.c
===================================================================
--- xen/evtchn/evtchn.c	(.../stable/6/sys)	(revision 0)
+++ xen/evtchn/evtchn.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,1106 @@
+/******************************************************************************
+ * evtchn.c
+ * 
+ * Communication via Xen event channels.
+ * 
+ * Copyright (c) 2002-2005, K A Fraser
+ * Copyright (c) 2005-2006 Kip Macy
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/bus.h>
+#include <sys/limits.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/interrupt.h>
+#include <sys/pcpu.h>
+#include <sys/smp.h>
+
+#include <machine/cpufunc.h>
+#include <machine/intr_machdep.h>
+
+#include <machine/xen/xen-os.h>
+#include <machine/xen/xenvar.h>
+#include <xen/xen_intr.h>
+#include <machine/xen/synch_bitops.h>
+#include <xen/evtchn.h>
+#include <xen/hypervisor.h>
+#include <sys/smp.h>
+
+#include <xen/xen_intr.h>
+#include <xen/evtchn.h>
+
+static inline unsigned long __ffs(unsigned long word)
+{
+        __asm__("bsfl %1,%0"
+                :"=r" (word)
+                :"rm" (word));
+        return word;
+}
+
+static struct mtx irq_mapping_update_lock;
+static struct xenpic *xp;
+struct xenpic_intsrc {
+	struct intsrc     xp_intsrc;
+	void		  *xp_cookie;
+	uint8_t           xp_vector;
+	boolean_t	  xp_masked;
+};
+
+struct xenpic { 
+	struct pic           *xp_dynirq_pic; 
+	struct pic           *xp_pirq_pic;   
+	uint16_t             xp_numintr; 
+	struct xenpic_intsrc xp_pins[0]; 
+}; 
+
+#define TODO            printf("%s: not implemented!\n", __func__) 
+
+/* IRQ <-> event-channel mappings. */
+static int evtchn_to_irq[NR_EVENT_CHANNELS];
+
+/* Packed IRQ information: binding type, sub-type index, and event channel. */
+static uint32_t irq_info[NR_IRQS];
+/* Binding types. */
+enum {
+	IRQT_UNBOUND,
+	IRQT_PIRQ,
+	IRQT_VIRQ,
+	IRQT_IPI,
+	IRQT_LOCAL_PORT,
+	IRQT_CALLER_PORT,
+	_IRQT_COUNT
+	
+};
+
+
+#define _IRQT_BITS 4
+#define _EVTCHN_BITS 12
+#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
+
+/* Constructor for packed IRQ information. */
+static inline uint32_t
+mk_irq_info(uint32_t type, uint32_t index, uint32_t evtchn)
+{
+
+	return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
+}
+
+/* Constructor for packed IRQ information. */
+
+/* Convenient shorthand for packed representation of an unbound IRQ. */
+#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
+
+/*
+ * Accessors for packed IRQ information.
+ */
+
+static inline unsigned int evtchn_from_irq(int irq)
+{
+	return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
+}
+
+static inline unsigned int index_from_irq(int irq)
+{
+	return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
+}
+
+static inline unsigned int type_from_irq(int irq)
+{
+	return irq_info[irq] >> (32 - _IRQT_BITS);
+}
+
+
+/* IRQ <-> VIRQ mapping. */ 
+ 
+/* IRQ <-> IPI mapping. */ 
+#ifndef NR_IPIS
+#ifdef SMP
+#error "NR_IPIS not defined"
+#endif
+#define NR_IPIS 1 
+#endif 
+
+/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
+static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)];
+
+/* Reference counts for bindings to IRQs. */
+static int irq_bindcount[NR_IRQS];
+
+#define VALID_EVTCHN(_chn) ((_chn) != 0)
+
+#ifdef SMP
+
+static uint8_t cpu_evtchn[NR_EVENT_CHANNELS];
+static unsigned long cpu_evtchn_mask[MAX_VIRT_CPUS][NR_EVENT_CHANNELS/LONG_BIT];
+
+#define active_evtchns(cpu,sh,idx)		\
+	((sh)->evtchn_pending[idx] &		\
+	 cpu_evtchn_mask[cpu][idx] &		\
+	 ~(sh)->evtchn_mask[idx])
+
+static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
+{
+	clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
+	set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
+	cpu_evtchn[chn] = cpu;
+}
+
+static void init_evtchn_cpu_bindings(void)
+{
+	/* By default all event channels notify CPU#0. */
+	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
+	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+}
+
+#define cpu_from_evtchn(evtchn)		(cpu_evtchn[evtchn])
+
+#else
+
+#define active_evtchns(cpu,sh,idx)		\
+	((sh)->evtchn_pending[idx] &		\
+	 ~(sh)->evtchn_mask[idx])
+#define bind_evtchn_to_cpu(chn,cpu)	((void)0)
+#define init_evtchn_cpu_bindings()	((void)0)
+#define cpu_from_evtchn(evtchn)		(0)
+
+#endif
+
+
+/*
+ * Force a proper event-channel callback from Xen after clearing the
+ * callback mask. We do this in a very simple manner, by making a call
+ * down into Xen. The pending flag will be checked by Xen on return.
+ */
+void force_evtchn_callback(void)
+{
+	(void)HYPERVISOR_xen_version(0, NULL);
+}
+
+void 
+evtchn_do_upcall(struct intrframe *frame) 
+{
+	unsigned long  l1, l2;
+	unsigned int   l1i, l2i, port;
+	int            irq, cpu;
+	shared_info_t *s;
+	vcpu_info_t   *vcpu_info;
+	
+	cpu = PCPU_GET(cpuid);
+	s = HYPERVISOR_shared_info;
+	vcpu_info = &s->vcpu_info[cpu];
+
+	vcpu_info->evtchn_upcall_pending = 0;
+
+	/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+	l1 = xen_xchg(&vcpu_info->evtchn_pending_sel, 0);
+
+	while (l1 != 0) {
+		l1i = __ffs(l1);
+		l1 &= ~(1 << l1i);
+		
+		while ((l2 = active_evtchns(cpu, s, l1i)) != 0) {
+			l2i = __ffs(l2);
+
+			port = (l1i * LONG_BIT) + l2i;
+			if ((irq = evtchn_to_irq[port]) != -1) {
+				struct intsrc *isrc = intr_lookup_source(irq);
+				/* 
+				 * ack 
+				 */
+				mask_evtchn(port);
+				clear_evtchn(port); 
+
+				intr_execute_handlers(isrc, frame);
+			} else {
+				evtchn_device_upcall(port);
+			}
+		}
+	}
+}
+
+void
+ipi_pcpu(unsigned int cpu, int vector) 
+{ 
+        int irq;
+
+	irq = PCPU_GET(ipi_to_irq[vector]);
+	
+        notify_remote_via_irq(irq); 
+} 
+
+static int 
+find_unbound_irq(void)
+{
+	int dynirq, irq;
+	
+	for (dynirq = 0; dynirq < NR_IRQS; dynirq++) {
+		irq = dynirq_to_irq(dynirq);
+		if (irq_bindcount[irq] == 0)
+			break;
+	}
+	
+	if (irq == NR_IRQS)
+		panic("No available IRQ to bind to: increase NR_IRQS!\n");
+
+	return (irq);
+}
+
+static int
+bind_caller_port_to_irq(unsigned int caller_port)
+{
+        int irq;
+
+        mtx_lock_spin(&irq_mapping_update_lock);
+
+        if ((irq = evtchn_to_irq[caller_port]) == -1) {
+                if ((irq = find_unbound_irq()) < 0)
+                        goto out;
+
+                evtchn_to_irq[caller_port] = irq;
+                irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
+        }
+
+        irq_bindcount[irq]++;
+	unmask_evtchn(caller_port);
+
+ out:
+        mtx_unlock_spin(&irq_mapping_update_lock);
+        return irq;
+}
+
+static int
+bind_local_port_to_irq(unsigned int local_port)
+{
+        int irq;
+
+        mtx_lock_spin(&irq_mapping_update_lock);
+
+        KASSERT(evtchn_to_irq[local_port] == -1,
+	    ("evtchn_to_irq inconsistent"));
+	
+        if ((irq = find_unbound_irq()) < 0) {
+                struct evtchn_close close = { .port = local_port };
+                HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+		
+                goto out;
+        }
+
+        evtchn_to_irq[local_port] = irq;
+        irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
+        irq_bindcount[irq]++;
+	unmask_evtchn(local_port);
+
+ out:
+        mtx_unlock_spin(&irq_mapping_update_lock);
+        return irq;
+}
+
+static int
+bind_listening_port_to_irq(unsigned int remote_domain)
+{
+        struct evtchn_alloc_unbound alloc_unbound;
+        int err;
+
+        alloc_unbound.dom        = DOMID_SELF;
+        alloc_unbound.remote_dom = remote_domain;
+
+        err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+                                          &alloc_unbound);
+
+        return err ? : bind_local_port_to_irq(alloc_unbound.port);
+}
+
+static int
+bind_interdomain_evtchn_to_irq(unsigned int remote_domain,
+    unsigned int remote_port)
+{
+        struct evtchn_bind_interdomain bind_interdomain;
+        int err;
+
+        bind_interdomain.remote_dom  = remote_domain;
+        bind_interdomain.remote_port = remote_port;
+
+        err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
+                                          &bind_interdomain);
+
+        return err ? : bind_local_port_to_irq(bind_interdomain.local_port);
+}
+
+static int 
+bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+{
+	struct evtchn_bind_virq bind_virq;
+	int evtchn = 0, irq;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+
+	if ((irq = pcpu_find(cpu)->pc_virq_to_irq[virq]) == -1) {
+		if ((irq = find_unbound_irq()) < 0)
+			goto out;
+
+		bind_virq.virq = virq;
+		bind_virq.vcpu = cpu;
+		HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
+
+		evtchn = bind_virq.port;
+
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+
+		pcpu_find(cpu)->pc_virq_to_irq[virq] = irq;
+
+		bind_evtchn_to_cpu(evtchn, cpu);
+	}
+
+	irq_bindcount[irq]++;
+	unmask_evtchn(evtchn);
+out:
+	mtx_unlock_spin(&irq_mapping_update_lock);
+
+	return irq;
+}
+
+
+extern int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu);
+
+int 
+bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+{
+	struct evtchn_bind_ipi bind_ipi;
+	int irq;
+	int evtchn = 0;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+	
+	if ((irq = pcpu_find(cpu)->pc_ipi_to_irq[ipi]) == -1) {
+		if ((irq = find_unbound_irq()) < 0)
+			goto out;
+
+		bind_ipi.vcpu = cpu;
+		HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
+		evtchn = bind_ipi.port;
+
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+
+		pcpu_find(cpu)->pc_ipi_to_irq[ipi] = irq;
+
+		bind_evtchn_to_cpu(evtchn, cpu);
+	}
+	irq_bindcount[irq]++;
+	unmask_evtchn(evtchn);
+out:
+	
+	mtx_unlock_spin(&irq_mapping_update_lock);
+
+	return irq;
+}
+
+
+static void 
+unbind_from_irq(int irq)
+{
+	struct evtchn_close close;
+	int evtchn = evtchn_from_irq(irq);
+	int cpu;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+
+	if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
+		close.port = evtchn;
+		HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+
+		switch (type_from_irq(irq)) {
+		case IRQT_VIRQ:
+			cpu = cpu_from_evtchn(evtchn);
+			pcpu_find(cpu)->pc_virq_to_irq[index_from_irq(irq)] = -1;
+			break;
+		case IRQT_IPI:
+			cpu = cpu_from_evtchn(evtchn);
+			pcpu_find(cpu)->pc_ipi_to_irq[index_from_irq(irq)] = -1;
+			break;
+		default:
+			break;
+		}
+
+		/* Closed ports are implicitly re-bound to VCPU0. */
+		bind_evtchn_to_cpu(evtchn, 0);
+
+		evtchn_to_irq[evtchn] = -1;
+		irq_info[irq] = IRQ_UNBOUND;
+	}
+
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+int 
+bind_caller_port_to_irqhandler(unsigned int caller_port,
+    const char *devname, driver_intr_t handler, void *arg,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+	int error;
+
+	irq = bind_caller_port_to_irq(caller_port);
+	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
+	error = intr_add_handler(devname, irq, handler, arg, irqflags,
+	    &xp->xp_pins[irq].xp_cookie);
+
+	if (error) {
+		unbind_from_irq(irq);
+		return (error);
+	}
+
+	if (irqp)
+		*irqp = irq;
+
+	return (0);
+}
+
+int 
+bind_listening_port_to_irqhandler(unsigned int remote_domain,
+    const char *devname, driver_intr_t handler, void *arg,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+	int error;
+
+	irq = bind_listening_port_to_irq(remote_domain);
+	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
+	error = intr_add_handler(devname, irq, handler, arg, irqflags,
+	    &xp->xp_pins[irq].xp_cookie);
+	if (error) {
+		unbind_from_irq(irq);
+		return (error);
+	}
+	if (irqp)
+		*irqp = irq;
+	
+	return (0);
+}
+
+int 
+bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+    unsigned int remote_port, const char *devname, driver_intr_t handler,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+	int error;
+
+	irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port);
+	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
+	error = intr_add_handler(devname, irq, handler, NULL,
+	    irqflags, &xp->xp_pins[irq].xp_cookie);
+	if (error) {
+		unbind_from_irq(irq);
+		return (error);
+	}
+
+	if (irqp)
+		*irqp = irq;
+	return (0);
+}
+
+int 
+bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+    const char *devname, driver_intr_t handler,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+	int error;
+
+	irq = bind_virq_to_irq(virq, cpu);
+	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
+	error = intr_add_handler(devname, irq, handler,
+	    NULL, irqflags, &xp->xp_pins[irq].xp_cookie);
+	if (error) {
+		unbind_from_irq(irq);
+		return (error);
+	}
+
+	if (irqp)
+		*irqp = irq;
+	return (0);
+}
+
+int 
+bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
+    const char *devname, driver_intr_t handler,
+    unsigned long irqflags, unsigned int *irqp)
+{
+	unsigned int irq;
+	int error;
+	
+	irq = bind_ipi_to_irq(ipi, cpu);
+	intr_register_source(&xp->xp_pins[irq].xp_intsrc);
+	error = intr_add_handler(devname, irq, handler,
+	    NULL, irqflags, &xp->xp_pins[irq].xp_cookie);
+	if (error) {
+		unbind_from_irq(irq);
+		return (error);
+	}
+
+	if (irqp)
+		*irqp = irq;
+	return (0);
+}
+
+void
+unbind_from_irqhandler(unsigned int irq)
+{
+	intr_remove_handler(xp->xp_pins[irq].xp_cookie);
+	unbind_from_irq(irq);
+}
+
+#if 0
+/* Rebind an evtchn so that it gets delivered to a specific cpu */
+static void
+rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
+{
+	evtchn_op_t op = { .cmd = EVTCHNOP_bind_vcpu };
+	int evtchn;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+
+	evtchn = evtchn_from_irq(irq);
+	if (!VALID_EVTCHN(evtchn)) {
+		mtx_unlock_spin(&irq_mapping_update_lock);
+		return;
+	}
+
+	/* Send future instances of this interrupt to other vcpu. */
+	bind_vcpu.port = evtchn;
+	bind_vcpu.vcpu = tcpu;
+
+	/*
+	 * If this fails, it usually just indicates that we're dealing with a 
+	 * virq or IPI channel, which don't actually need to be rebound. Ignore
+	 * it, but don't do the xenlinux-level rebind in that case.
+	 */
+	if (HYPERVISOR_event_channel_op(&op) >= 0)
+		bind_evtchn_to_cpu(evtchn, tcpu);
+
+	mtx_unlock_spin(&irq_mapping_update_lock);
+
+}
+
+static void set_affinity_irq(unsigned irq, cpumask_t dest)
+{
+	unsigned tcpu = ffs(dest) - 1;
+	rebind_irq_to_cpu(irq, tcpu);
+}
+#endif
+
+/*
+ * Interface to generic handling in intr_machdep.c
+ */
+
+
+/*------------ interrupt handling --------------------------------------*/
+#define TODO            printf("%s: not implemented!\n", __func__) 
+
+
+static void     xenpic_dynirq_enable_source(struct intsrc *isrc); 
+static void     xenpic_dynirq_disable_source(struct intsrc *isrc, int); 
+static void     xenpic_dynirq_eoi_source(struct intsrc *isrc); 
+static void     xenpic_dynirq_enable_intr(struct intsrc *isrc); 
+
+static void     xenpic_pirq_enable_source(struct intsrc *isrc); 
+static void     xenpic_pirq_disable_source(struct intsrc *isrc, int); 
+static void     xenpic_pirq_eoi_source(struct intsrc *isrc); 
+static void     xenpic_pirq_enable_intr(struct intsrc *isrc); 
+
+
+static int      xenpic_vector(struct intsrc *isrc); 
+static int      xenpic_source_pending(struct intsrc *isrc); 
+static void     xenpic_suspend(struct pic* pic); 
+static void     xenpic_resume(struct pic* pic); 
+static void     xenpic_assign_cpu(struct intsrc *, u_int apic_id);
+
+
+struct pic xenpic_dynirq_template  =  { 
+	.pic_enable_source	=	xenpic_dynirq_enable_source, 
+	.pic_disable_source	=	xenpic_dynirq_disable_source,
+	.pic_eoi_source		=	xenpic_dynirq_eoi_source, 
+	.pic_enable_intr	=	xenpic_dynirq_enable_intr, 
+	.pic_vector		=	xenpic_vector, 
+	.pic_source_pending	=	xenpic_source_pending,
+	.pic_suspend		=	xenpic_suspend, 
+	.pic_resume		=	xenpic_resume 
+};
+
+struct pic xenpic_pirq_template  =  { 
+	.pic_enable_source	=	xenpic_pirq_enable_source, 
+	.pic_disable_source	=	xenpic_pirq_disable_source,
+	.pic_eoi_source		=	xenpic_pirq_eoi_source, 
+	.pic_enable_intr	=	xenpic_pirq_enable_intr, 
+	.pic_vector		=	xenpic_vector, 
+	.pic_source_pending	=	xenpic_source_pending,
+	.pic_suspend		=	xenpic_suspend, 
+	.pic_resume		=	xenpic_resume,
+	.pic_assign_cpu		=	xenpic_assign_cpu
+};
+
+
+
+void 
+xenpic_dynirq_enable_source(struct intsrc *isrc)
+{
+	unsigned int irq;
+	struct xenpic_intsrc *xp;
+
+	xp = (struct xenpic_intsrc *)isrc;
+	
+	mtx_lock_spin(&irq_mapping_update_lock);
+	if (xp->xp_masked) {
+		irq = xenpic_vector(isrc);
+		unmask_evtchn(evtchn_from_irq(irq));
+		xp->xp_masked = FALSE;
+	}
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static void 
+xenpic_dynirq_disable_source(struct intsrc *isrc, int foo)
+{
+	unsigned int irq;
+	struct xenpic_intsrc *xp;
+	
+	xp = (struct xenpic_intsrc *)isrc;
+	
+	mtx_lock_spin(&irq_mapping_update_lock);
+	if (!xp->xp_masked) {
+		irq = xenpic_vector(isrc);
+		mask_evtchn(evtchn_from_irq(irq));
+		xp->xp_masked = TRUE;
+	}	
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static void 
+xenpic_dynirq_enable_intr(struct intsrc *isrc)
+{
+	unsigned int irq;
+	struct xenpic_intsrc *xp;
+	
+	xp = (struct xenpic_intsrc *)isrc;	
+	mtx_lock_spin(&irq_mapping_update_lock);
+	xp->xp_masked = 0;
+	irq = xenpic_vector(isrc);
+	unmask_evtchn(evtchn_from_irq(irq));
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static void 
+xenpic_dynirq_eoi_source(struct intsrc *isrc)
+{
+	unsigned int irq;
+	struct xenpic_intsrc *xp;
+	
+	xp = (struct xenpic_intsrc *)isrc;	
+	mtx_lock_spin(&irq_mapping_update_lock);
+	xp->xp_masked = 0;
+	irq = xenpic_vector(isrc);
+	unmask_evtchn(evtchn_from_irq(irq));
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static int
+xenpic_vector(struct intsrc *isrc)
+{
+    struct xenpic_intsrc *pin;
+
+    pin = (struct xenpic_intsrc *)isrc;
+   //printf("xenpic_vector(): isrc=%p,vector=%u\n", pin, pin->xp_vector);
+
+    return (pin->xp_vector);
+}
+
+static int
+xenpic_source_pending(struct intsrc *isrc)
+{
+    struct xenpic_intsrc *pin = (struct xenpic_intsrc *)isrc;
+
+	/* XXXEN: TODO */
+	printf("xenpic_source_pending(): vector=%x,masked=%x\n",
+	    pin->xp_vector, pin->xp_masked);
+
+/*	notify_remote_via_evtchn(pin->xp_vector); // XXX RS: Is this correct? */
+	return 0;
+}
+
+static void 
+xenpic_suspend(struct pic* pic)
+{ 
+	TODO; 
+} 
+ 
+static void 
+xenpic_resume(struct pic* pic)
+{ 
+	TODO; 
+}
+
+static void 
+xenpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{ 
+	TODO; 
+}
+
+void
+notify_remote_via_irq(int irq)
+{
+	int evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		notify_remote_via_evtchn(evtchn);
+	else
+		panic("invalid evtchn %d", irq);
+}
+
+/* required for support of physical devices */
+static inline void 
+pirq_unmask_notify(int pirq)
+{
+	struct physdev_eoi eoi = { .irq = pirq };
+
+	if (unlikely(test_bit(pirq, &pirq_needs_unmask_notify[0]))) {
+		(void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
+	}
+}
+
+static inline void 
+pirq_query_unmask(int pirq)
+{
+	struct physdev_irq_status_query irq_status_query;
+
+	irq_status_query.irq = pirq;
+	(void)HYPERVISOR_physdev_op(PHYSDEVOP_IRQ_STATUS_QUERY, &irq_status_query);
+	clear_bit(pirq, &pirq_needs_unmask_notify[0]);
+	if ( irq_status_query.flags & PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY )
+		set_bit(pirq, &pirq_needs_unmask_notify[0]);
+}
+
+/*
+ * On startup, if there is no action associated with the IRQ then we are
+ * probing. In this case we should not share with others as it will confuse us.
+ */
+#define probing_irq(_irq) (intr_lookup_source(irq) == NULL)
+
+static void 
+xenpic_pirq_enable_intr(struct intsrc *isrc)
+{
+	struct evtchn_bind_pirq bind_pirq;
+	int evtchn;
+	unsigned int irq;
+	
+	mtx_lock_spin(&irq_mapping_update_lock);
+	irq = xenpic_vector(isrc);
+	evtchn = evtchn_from_irq(irq);
+
+	if (VALID_EVTCHN(evtchn))
+		goto out;
+
+	bind_pirq.pirq  = irq;
+	/* NB. We are happy to share unless we are probing. */
+	bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
+	
+	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
+#ifndef XEN_PRIVILEGED_GUEST
+		panic("unexpected pirq call");
+#endif
+		if (!probing_irq(irq)) /* Some failures are expected when probing. */
+			printf("Failed to obtain physical IRQ %d\n", irq);
+		mtx_unlock_spin(&irq_mapping_update_lock);
+		return;
+	}
+	evtchn = bind_pirq.port;
+
+	pirq_query_unmask(irq_to_pirq(irq));
+
+	bind_evtchn_to_cpu(evtchn, 0);
+	evtchn_to_irq[evtchn] = irq;
+	irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, evtchn);
+
+ out:
+	unmask_evtchn(evtchn);
+	pirq_unmask_notify(irq_to_pirq(irq));
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static void 
+xenpic_pirq_enable_source(struct intsrc *isrc)
+{
+	int evtchn;
+	unsigned int irq;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+	irq = xenpic_vector(isrc);
+	evtchn = evtchn_from_irq(irq);
+
+	if (!VALID_EVTCHN(evtchn))
+		goto done;
+
+	unmask_evtchn(evtchn);
+	pirq_unmask_notify(irq_to_pirq(irq));
+ done:
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+static void 
+xenpic_pirq_disable_source(struct intsrc *isrc, int eoi)
+{
+	int evtchn;
+	unsigned int irq;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+	irq = xenpic_vector(isrc);
+	evtchn = evtchn_from_irq(irq);
+
+	if (!VALID_EVTCHN(evtchn))
+		goto done;
+
+	mask_evtchn(evtchn);
+ done:
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+
+static void 
+xenpic_pirq_eoi_source(struct intsrc *isrc)
+{
+	int evtchn;
+	unsigned int irq;
+
+	mtx_lock_spin(&irq_mapping_update_lock);
+	irq = xenpic_vector(isrc);
+	evtchn = evtchn_from_irq(irq);
+
+	if (!VALID_EVTCHN(evtchn))
+		goto done;
+
+	unmask_evtchn(evtchn);
+	pirq_unmask_notify(irq_to_pirq(irq));
+ done:
+	mtx_unlock_spin(&irq_mapping_update_lock);
+}
+
+int
+irq_to_evtchn_port(int irq)
+{
+	return evtchn_from_irq(irq);
+}
+
+void 
+mask_evtchn(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	synch_set_bit(port, &s->evtchn_mask[0]);
+}
+
+void 
+unmask_evtchn(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	unsigned int cpu = PCPU_GET(cpuid);
+	vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
+
+	/* Slow path (hypercall) if this is a non-local port. */
+	if (unlikely(cpu != cpu_from_evtchn(port))) {
+		struct evtchn_unmask unmask = { .port = port };
+		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
+		return;
+	}
+
+	synch_clear_bit(port, &s->evtchn_mask);
+
+	/*
+	 * The following is basically the equivalent of 'hw_resend_irq'. Just
+	 * like a real IO-APIC we 'lose the interrupt edge' if the channel is
+	 * masked.
+	 */
+	if (synch_test_bit(port, &s->evtchn_pending) && 
+	    !synch_test_and_set_bit(port / LONG_BIT,
+				    &vcpu_info->evtchn_pending_sel)) {
+		vcpu_info->evtchn_upcall_pending = 1;
+		if (!vcpu_info->evtchn_upcall_mask)
+			force_evtchn_callback();
+	}
+}
+
+void irq_resume(void)
+{
+	evtchn_op_t op;
+	int         cpu, pirq, virq, ipi, irq, evtchn;
+
+	struct evtchn_bind_virq bind_virq;
+	struct evtchn_bind_ipi bind_ipi;	
+
+	init_evtchn_cpu_bindings();
+
+	/* New event-channel space is not 'live' yet. */
+	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+		mask_evtchn(evtchn);
+
+	/* Check that no PIRQs are still bound. */
+	for (pirq = 0; pirq < NR_PIRQS; pirq++) {
+		KASSERT(irq_info[pirq_to_irq(pirq)] == IRQ_UNBOUND,
+		    ("pirq_to_irq inconsistent"));
+	}
+
+	/* Secondary CPUs must have no VIRQ or IPI bindings. */
+	for (cpu = 1; cpu < MAX_VIRT_CPUS; cpu++) {
+		for (virq = 0; virq < NR_VIRQS; virq++) {
+			KASSERT(pcpu_find(cpu)->pc_virq_to_irq[virq] == -1,
+			    ("virq_to_irq inconsistent"));
+		}
+		for (ipi = 0; ipi < NR_IPIS; ipi++) {
+			KASSERT(pcpu_find(cpu)->pc_ipi_to_irq[ipi] == -1,
+			    ("ipi_to_irq inconsistent"));
+		}
+	}
+
+	/* No IRQ <-> event-channel mappings. */
+	for (irq = 0; irq < NR_IRQS; irq++)
+		irq_info[irq] &= ~0xFFFF; /* zap event-channel binding */
+	for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
+		evtchn_to_irq[evtchn] = -1;
+
+	/* Primary CPU: rebind VIRQs automatically. */
+	for (virq = 0; virq < NR_VIRQS; virq++) {
+		if ((irq = pcpu_find(0)->pc_virq_to_irq[virq]) == -1)
+			continue;
+
+		KASSERT(irq_info[irq] == mk_irq_info(IRQT_VIRQ, virq, 0),
+		    ("irq_info inconsistent"));
+
+		/* Get a new binding from Xen. */
+		bind_virq.virq = virq;
+		bind_virq.vcpu = 0;
+		HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq);
+		evtchn = bind_virq.port;
+        
+		/* Record the new mapping. */
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+
+		/* Ready for use. */
+		unmask_evtchn(evtchn);
+	}
+
+	/* Primary CPU: rebind IPIs automatically. */
+	for (ipi = 0; ipi < NR_IPIS; ipi++) {
+		if ((irq = pcpu_find(0)->pc_ipi_to_irq[ipi]) == -1)
+			continue;
+
+		KASSERT(irq_info[irq] == mk_irq_info(IRQT_IPI, ipi, 0),
+		    ("irq_info inconsistent"));
+
+		/* Get a new binding from Xen. */
+		memset(&op, 0, sizeof(op));
+		bind_ipi.vcpu = 0;
+		HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi);
+		evtchn = bind_ipi.port;
+        
+		/* Record the new mapping. */
+		evtchn_to_irq[evtchn] = irq;
+		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+
+		/* Ready for use. */
+		unmask_evtchn(evtchn);
+	}
+}
+
+static void 
+evtchn_init(void *dummy __unused)
+{
+	int i, cpu;
+	struct xenpic_intsrc *pin, *tpin;
+
+
+	init_evtchn_cpu_bindings();
+	
+         /* No VIRQ or IPI bindings. */
+	for (cpu = 0; cpu < mp_ncpus; cpu++) {
+		for (i = 0; i < NR_VIRQS; i++)
+			pcpu_find(cpu)->pc_virq_to_irq[i] = -1;
+		for (i = 0; i < NR_IPIS; i++)
+			pcpu_find(cpu)->pc_ipi_to_irq[i] = -1;
+	}
+
+	/* No event-channel -> IRQ mappings. */
+	for (i = 0; i < NR_EVENT_CHANNELS; i++) {
+		evtchn_to_irq[i] = -1;
+		mask_evtchn(i); /* No event channels are 'live' right now. */
+	}
+
+	/* No IRQ -> event-channel mappings. */
+	for (i = 0; i < NR_IRQS; i++)
+		irq_info[i] = IRQ_UNBOUND;
+	
+	xp = malloc(sizeof(struct xenpic) + NR_IRQS*sizeof(struct xenpic_intsrc), 
+		    M_DEVBUF, M_WAITOK);
+
+	xp->xp_dynirq_pic = &xenpic_dynirq_template;
+	xp->xp_pirq_pic = &xenpic_pirq_template;
+	xp->xp_numintr = NR_IRQS;
+	bzero(xp->xp_pins, sizeof(struct xenpic_intsrc) * NR_IRQS);
+
+
+	/* We need to register our PIC's beforehand */
+	if (intr_register_pic(&xenpic_pirq_template))
+		panic("XEN: intr_register_pic() failure");
+	if (intr_register_pic(&xenpic_dynirq_template))
+		panic("XEN: intr_register_pic() failure");
+
+	/*
+	 * Initialize the dynamic IRQ's - we initialize the structures, but
+	 * we do not bind them (bind_evtchn_to_irqhandle() does this)
+	 */
+	pin = xp->xp_pins;
+	for (i = 0; i < NR_DYNIRQS; i++) {
+		/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+		irq_bindcount[dynirq_to_irq(i)] = 0;
+
+		tpin = &pin[dynirq_to_irq(i)];
+		tpin->xp_intsrc.is_pic = xp->xp_dynirq_pic;
+		tpin->xp_vector = dynirq_to_irq(i);
+		
+	}
+	/*
+	 * Now, we go ahead and claim every PIRQ there is.
+	 */
+	pin = xp->xp_pins;
+	for (i = 0; i < NR_PIRQS; i++) {
+		/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+		irq_bindcount[pirq_to_irq(i)] = 0;
+
+#ifdef RTC_IRQ
+		/* If not domain 0, force our RTC driver to fail its probe. */
+		if ((i == RTC_IRQ) &&
+		    !(xen_start_info->flags & SIF_INITDOMAIN))
+			continue;
+#endif
+		tpin = &pin[pirq_to_irq(i)];		
+		tpin->xp_intsrc.is_pic = xp->xp_pirq_pic;
+		tpin->xp_vector = pirq_to_irq(i);
+
+	}
+}
+
+SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_MIDDLE, evtchn_init, NULL);
+    /*
+     * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical
+     * 	        section, to set pcpu->ipending (etc...) properly, we
+     *	        must be able to get the icu lock, so it can't be
+     *	        under witness.
+     */
+
+MTX_SYSINIT(irq_mapping_update_lock, &irq_mapping_update_lock, "xp", MTX_SPIN);

Property changes on: xen/evtchn/evtchn.c
___________________________________________________________________
Added: svn:keywords
   + FreeBSD=%H

Index: xen/reboot.c
===================================================================
--- xen/reboot.c	(.../stable/6/sys)	(revision 0)
+++ xen/reboot.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,262 @@
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004-2006,2008 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/malloc.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/reboot.h>
+#include <sys/sched.h>
+#include <sys/smp.h>
+#include <sys/systm.h>
+
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/gnttab.h>
+#include <xen/xen_intr.h>
+#include <xen/xenbus/xenbusvar.h>
+
+#include <vm/vm.h>
+#include <vm/pmap.h>
+
+#ifdef XENHVM
+
+#include <dev/xen/xenpci/xenpcivar.h>
+
+#else
+
+static void xen_suspend(void);
+
+#endif
+
+static void 
+shutdown_handler(struct xenbus_watch *watch,
+		 const char **vec, unsigned int len)
+{
+	char *str;
+	struct xenbus_transaction xbt;
+	int error, howto;
+	
+	howto = 0;
+
+ again:
+	error = xenbus_transaction_start(&xbt);
+	if (error)
+		return;
+
+	error = xenbus_read(xbt, "control", "shutdown", NULL, (void **) &str);
+
+	/* Ignore read errors and empty reads. */
+	if (error || strlen(str) == 0) {
+		xenbus_transaction_end(xbt, 1);
+		return;
+	}
+
+	xenbus_write(xbt, "control", "shutdown", "");
+
+	error = xenbus_transaction_end(xbt, 0);
+	if (error == EAGAIN) {
+		free(str, M_DEVBUF);
+		goto again;
+	}
+
+	if (strcmp(str, "reboot") == 0)
+		howto = 0;
+	else if (strcmp(str, "poweroff") == 0)
+		howto |= (RB_POWEROFF | RB_HALT);
+	else if (strcmp(str, "halt") == 0)
+#ifdef XENHVM
+		/*
+		 * We rely on acpi powerdown to halt the VM.
+		 */
+		howto |= (RB_POWEROFF | RB_HALT);
+#else
+		howto |= RB_HALT;
+#endif
+	else if (strcmp(str, "suspend") == 0)
+		howto = -1;
+	else {
+		printf("Ignoring shutdown request: %s\n", str);
+		goto done;
+	}
+
+	if (howto == -1) {
+		xen_suspend();
+		goto done;
+	}
+
+	shutdown_nice(howto);
+ done:
+	free(str, M_DEVBUF);
+}
+
+#ifndef XENHVM
+
+/*
+ * In HV mode, we let acpi take care of halts and reboots.
+ */
+
+static void
+xen_shutdown_final(void *arg, int howto)
+{
+
+	if (howto & (RB_HALT | RB_POWEROFF))
+		HYPERVISOR_shutdown(SHUTDOWN_poweroff);
+	else
+		HYPERVISOR_shutdown(SHUTDOWN_reboot);
+}
+
+#endif
+
+static struct xenbus_watch shutdown_watch = {
+	.node = "control/shutdown",
+	.callback = shutdown_handler
+};
+
+static void
+setup_shutdown_watcher(void *unused)
+{
+
+	if (register_xenbus_watch(&shutdown_watch))
+		printf("Failed to set shutdown watcher\n");
+#ifndef XENHVM
+	EVENTHANDLER_REGISTER(shutdown_final, xen_shutdown_final, NULL,
+	    SHUTDOWN_PRI_LAST);
+#endif
+}
+
+SYSINIT(shutdown, SI_SUB_PSEUDO, SI_ORDER_ANY, setup_shutdown_watcher, NULL);
+
+#ifndef XENHVM
+
+extern void xencons_suspend(void);
+extern void xencons_resume(void);
+
+static void 
+xen_suspend()
+{
+	int i, j, k, fpp;
+	unsigned long max_pfn, start_info_mfn;
+
+#ifdef SMP
+	cpumask_t map;
+	/*
+	 * Bind us to CPU 0 and stop any other VCPUs.
+	 */
+	mtx_lock_spin(&sched_lock);
+	sched_bind(curthread, 0);
+	mtx_unlock_spin(&sched_lock);
+	KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
+
+	map = PCPU_GET(other_cpus) & ~stopped_cpus;
+	if (map)
+		stop_cpus(map);
+#endif
+
+	if (DEVICE_SUSPEND(root_bus) != 0) {
+		printf("xen_suspend: device_suspend failed\n");
+		if (map)
+			restart_cpus(map);
+		return;
+	}
+
+	local_irq_disable();
+
+	xencons_suspend();
+	gnttab_suspend();
+
+	max_pfn = HYPERVISOR_shared_info->arch.max_pfn;
+
+	void *shared_info = HYPERVISOR_shared_info;
+	HYPERVISOR_shared_info = NULL;
+	pmap_kremove((vm_offset_t) shared_info);
+	PT_UPDATES_FLUSH();
+
+	xen_start_info->store_mfn = MFNTOPFN(xen_start_info->store_mfn);
+	xen_start_info->console.domU.mfn = MFNTOPFN(xen_start_info->console.domU.mfn);
+
+	/*
+	 * We'll stop somewhere inside this hypercall. When it returns,
+	 * we'll start resuming after the restore.
+	 */
+	start_info_mfn = VTOMFN(xen_start_info);
+	pmap_suspend();
+	HYPERVISOR_suspend(start_info_mfn);
+	pmap_resume();
+
+	pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
+	HYPERVISOR_shared_info = shared_info;
+
+	HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
+		VTOMFN(xen_pfn_to_mfn_frame_list_list);
+  
+	fpp = PAGE_SIZE/sizeof(unsigned long);
+	for (i = 0, j = 0, k = -1; i < max_pfn; i += fpp, j++) {
+		if ((j % fpp) == 0) {
+			k++;
+			xen_pfn_to_mfn_frame_list_list[k] = 
+				VTOMFN(xen_pfn_to_mfn_frame_list[k]);
+			j = 0;
+		}
+		xen_pfn_to_mfn_frame_list[k][j] = 
+			VTOMFN(&xen_phys_machine[i]);
+	}
+	HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
+
+	gnttab_resume();
+	irq_resume();
+	local_irq_enable();
+	xencons_resume();
+
+#ifdef CONFIG_SMP
+	for_each_cpu(i)
+		vcpu_prepare(i);
+
+#endif
+	/* 
+	 * Only resume xenbus /after/ we've prepared our VCPUs; otherwise
+	 * the VCPU hotplug callback can race with our vcpu_prepare
+	 */
+	DEVICE_RESUME(root_bus);
+
+#ifdef SMP
+	sched_unbind(curthread);
+	if (map)
+		restart_cpus(map);
+#endif
+}
+
+#endif

Property changes on: xen/reboot.c
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/xen/xen_machdep.c:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/xen/xen_machdep.c:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: xen/xen_intr.h
===================================================================
--- xen/xen_intr.h	(.../stable/6/sys)	(revision 0)
+++ xen/xen_intr.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,102 @@
+/* -*-  Mode:C; c-basic-offset:4; tab-width:4 -*- */
+#ifndef _XEN_INTR_H_
+#define _XEN_INTR_H_
+
+/*
+* The flat IRQ space is divided into two regions:
+*  1. A one-to-one mapping of real physical IRQs. This space is only used
+*     if we have physical device-access privilege. This region is at the 
+*     start of the IRQ space so that existing device drivers do not need
+*     to be modified to translate physical IRQ numbers into our IRQ space.
+*  3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
+*     are bound using the provided bind/unbind functions.
+*
+*
+* $FreeBSD$
+*/
+
+#define PIRQ_BASE   0
+#define NR_PIRQS  128
+
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
+#define NR_DYNIRQS  128
+
+#define NR_IRQS   (NR_PIRQS + NR_DYNIRQS)
+
+#define pirq_to_irq(_x)   ((_x) + PIRQ_BASE)
+#define irq_to_pirq(_x)   ((_x) - PIRQ_BASE)
+
+#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE)
+#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE)
+
+/* 
+ * Dynamic binding of event channels and VIRQ sources to guest IRQ space.
+ */
+
+/*
+ * Bind a caller port event channel to an interrupt handler. If
+ * successful, the guest IRQ number is returned in *irqp. Return zero
+ * on success or errno otherwise.
+ */
+extern int bind_caller_port_to_irqhandler(unsigned int caller_port,
+	const char *devname, driver_intr_t handler, void *arg,
+	unsigned long irqflags, unsigned int *irqp);
+
+/*
+ * Bind a listening port to an interrupt handler. If successful, the
+ * guest IRQ number is returned in *irqp. Return zero on success or
+ * errno otherwise.
+ */
+extern int bind_listening_port_to_irqhandler(unsigned int remote_domain,
+	const char *devname, driver_intr_t handler, void *arg,
+	unsigned long irqflags, unsigned int *irqp);
+
+/*
+ * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ
+ * number is returned in *irqp. Return zero on success or errno
+ * otherwise.
+ */
+extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
+	const char *devname, driver_intr_t handler,
+	unsigned long irqflags,	unsigned int *irqp);
+
+/*
+ * Bind an IPI to an interrupt handler. If successful, the guest
+ * IRQ number is returned in *irqp. Return zero on success or errno
+ * otherwise.
+ */
+extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu,
+	const char *devname, driver_intr_t handler,
+	unsigned long irqflags, unsigned int *irqp);
+
+/*
+ * Bind an interdomain event channel to an interrupt handler. If
+ * successful, the guest IRQ number is returned in *irqp. Return zero
+ * on success or errno otherwise.
+ */
+extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain,
+	unsigned int remote_port, const char *devname, driver_intr_t handler,
+	unsigned long irqflags, unsigned int *irqp);
+
+/*
+ * Unbind an interrupt handler using the guest IRQ number returned
+ * when it was bound.
+ */
+extern void unbind_from_irqhandler(unsigned int irq);
+
+static __inline__ int irq_cannonicalize(unsigned int irq)
+{
+    return (irq == 2) ? 9 : irq;
+}
+
+extern void disable_irq(unsigned int);
+extern void disable_irq_nosync(unsigned int);
+extern void enable_irq(unsigned int);
+
+extern void irq_suspend(void);
+extern void irq_resume(void);
+
+extern void	idle_block(void);
+extern int	ap_cpu_initclocks(int cpu);
+
+#endif /* _XEN_INTR_H_ */

Property changes on: xen/xen_intr.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen/xen_intr.h:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen/xen_intr.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: xen/evtchn.h
===================================================================
--- xen/evtchn.h	(.../stable/6/sys)	(revision 0)
+++ xen/evtchn.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,94 @@
+/******************************************************************************
+ * evtchn.h
+ * 
+ * Communication via Xen event channels.
+ * Also definitions for the device that demuxes notifications to userspace.
+ * 
+ * Copyright (c) 2004, K A Fraser
+ *
+ * $FreeBSD$
+ */
+
+#ifndef __ASM_EVTCHN_H__
+#define __ASM_EVTCHN_H__
+#include <machine/pcpu.h>
+#include <xen/hypervisor.h>
+#include <machine/xen/synch_bitops.h>
+#include <machine/frame.h>
+
+/*
+ * LOW-LEVEL DEFINITIONS
+ */
+
+/*
+ * Unlike notify_remote_via_evtchn(), this is safe to use across
+ * save/restore. Notifications on a broken connection are silently dropped.
+ */
+void notify_remote_via_irq(int irq);
+
+
+/* Entry point for notifications into Linux subsystems. */
+void evtchn_do_upcall(struct intrframe *frame);
+
+/* Entry point for notifications into the userland character device. */
+void evtchn_device_upcall(int port);
+
+void mask_evtchn(int port);
+
+void unmask_evtchn(int port);
+
+#ifdef SMP
+void rebind_evtchn_to_cpu(int port, unsigned int cpu);
+#else
+#define rebind_evtchn_to_cpu(port, cpu)	((void)0)
+#endif
+
+static inline
+int test_and_set_evtchn_mask(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	return synch_test_and_set_bit(port, s->evtchn_mask);
+}
+
+static inline void 
+clear_evtchn(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	synch_clear_bit(port, &s->evtchn_pending[0]);
+}
+
+static inline void 
+notify_remote_via_evtchn(int port)
+{
+        struct evtchn_send send = { .port = port };
+        (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send);
+}
+
+/*
+ * Use these to access the event channel underlying the IRQ handle returned
+ * by bind_*_to_irqhandler().
+ */
+int irq_to_evtchn_port(int irq);
+
+void ipi_pcpu(unsigned int cpu, int vector);
+
+/*
+ * CHARACTER-DEVICE DEFINITIONS
+ */
+
+#define PORT_NORMAL    0x0000
+#define PORT_EXCEPTION 0x8000
+#define PORTIDX_MASK   0x7fff
+
+/* /dev/xen/evtchn resides at device number major=10, minor=200 */
+#define EVTCHN_MINOR 200
+
+/* /dev/xen/evtchn ioctls: */
+/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */
+#define EVTCHN_RESET  _IO('E', 1)
+/* EVTCHN_BIND: Bind to the specified event-channel port. */
+#define EVTCHN_BIND   _IO('E', 2)
+/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */
+#define EVTCHN_UNBIND _IO('E', 3)
+
+#endif /* __ASM_EVTCHN_H__ */

Property changes on: xen/evtchn.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen/evtchn.h:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen/evtchn.h:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628
Added: svn:eol-style
   + native

Index: amd64/include/pcpu.h
===================================================================
--- amd64/include/pcpu.h	(.../stable/6/sys)	(revision 184012)
+++ amd64/include/pcpu.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -35,6 +35,24 @@
 
 #ifdef _KERNEL
 
+#if defined(XEN) || defined(XENHVM)
+#ifndef NR_VIRQS
+#define	NR_VIRQS	24
+#endif
+#ifndef NR_IPIS
+#define	NR_IPIS		2
+#endif
+#endif
+
+#ifdef XENHVM
+#define PCPU_XEN_FIELDS							\
+	;								\
+	unsigned int pc_last_processed_l1i;				\
+	unsigned int pc_last_processed_l2i
+#else
+#define PCPU_XEN_FIELDS
+#endif
+
 /*
  * The SMP parts are setup in pmap.c and locore.s for the BSP, and
  * mp_machdep.c sets up the data for the AP's to "see" when they awake.
@@ -49,7 +67,8 @@
 	register_t pc_rsp0;						\
 	register_t pc_scratch_rsp;	/* User %rsp in syscall */	\
 	u_int	pc_apic_id;						\
-	u_int   pc_acpi_id		/* ACPI CPU id */
+	u_int   pc_acpi_id		/* ACPI CPU id */		\
+	PCPU_XEN_FIELDS
 
 #if defined(lint)
  
Index: amd64/include/xen/xen-os.h
===================================================================
--- amd64/include/xen/xen-os.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/xen-os.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,296 @@
+/******************************************************************************
+ * os.h
+ * 
+ * random collection of macros and definition
+ */
+
+#ifndef _XEN_OS_H_
+#define _XEN_OS_H_
+
+#ifdef PAE
+#define CONFIG_X86_PAE
+#endif
+
+#if !defined(__XEN_INTERFACE_VERSION__)  
+/*  
+ * Can update to a more recent version when we implement  
+ * the hypercall page  
+ */  
+#define  __XEN_INTERFACE_VERSION__ 0x00030204  
+#endif  
+
+#include <xen/interface/xen.h>
+
+/* Force a proper event-channel callback from Xen. */
+void force_evtchn_callback(void);
+
+extern int gdtset;
+
+extern shared_info_t *HYPERVISOR_shared_info;
+
+/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
+static inline void rep_nop(void)
+{
+    __asm__ __volatile__ ( "rep;nop" : : : "memory" );
+}
+#define cpu_relax() rep_nop()
+
+/* crude memory allocator for memory allocation early in 
+ *  boot
+ */
+void *bootmem_alloc(unsigned int size);
+void bootmem_free(void *ptr, unsigned int size);
+
+
+/* Everything below this point is not included by assembler (.S) files. */
+#ifndef __ASSEMBLY__
+
+void printk(const char *fmt, ...);
+
+/* some function prototypes */
+void trap_init(void);
+
+#define likely(x)  __builtin_expect((x),1)
+#define unlikely(x)  __builtin_expect((x),0)
+
+#ifndef XENHVM
+
+/*
+ * STI/CLI equivalents. These basically set and clear the virtual
+ * event_enable flag in teh shared_info structure. Note that when
+ * the enable bit is set, there may be pending events to be handled.
+ * We may therefore call into do_hypervisor_callback() directly.
+ */
+
+#define __cli()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+#define __sti()                                                         \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        _vcpu->evtchn_upcall_mask = 0;                                  \
+        barrier(); /* unmask then check (avoid races) */                \
+        if ( unlikely(_vcpu->evtchn_upcall_pending) )                   \
+                force_evtchn_callback();                                \
+} while (0)
+
+#define __restore_flags(x)                                              \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        barrier();                                                      \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        if ((_vcpu->evtchn_upcall_mask = (x)) == 0) {                   \
+                barrier(); /* unmask then check (avoid races) */        \
+                if ( unlikely(_vcpu->evtchn_upcall_pending) )           \
+                        force_evtchn_callback();                        \
+        } 								\
+} while (0)
+
+/*
+ * Add critical_{enter, exit}?
+ *
+ */
+#define __save_and_cli(x)                                               \
+do {                                                                    \
+        vcpu_info_t *_vcpu;                                             \
+        _vcpu = &HYPERVISOR_shared_info->vcpu_info[PCPU_GET(cpuid)];	\
+        (x) = _vcpu->evtchn_upcall_mask;                                \
+        _vcpu->evtchn_upcall_mask = 1;                                  \
+        barrier();                                                      \
+} while (0)
+
+
+#define cli() __cli()
+#define sti() __sti()
+#define save_flags(x) __save_flags(x)
+#define restore_flags(x) __restore_flags(x)
+#define save_and_cli(x) __save_and_cli(x)
+
+#define local_irq_save(x)       __save_and_cli(x)
+#define local_irq_restore(x)    __restore_flags(x)
+#define local_irq_disable()     __cli()
+#define local_irq_enable()      __sti()
+
+#define mtx_lock_irqsave(lock, x) {local_irq_save((x)); mtx_lock_spin((lock));}
+#define mtx_unlock_irqrestore(lock, x) {mtx_unlock_spin((lock)); local_irq_restore((x)); }
+#define spin_lock_irqsave mtx_lock_irqsave
+#define spin_unlock_irqrestore mtx_unlock_irqrestore
+
+#else
+#endif
+
+#ifndef mb
+#define mb() __asm__ __volatile__("mfence":::"memory")
+#endif
+#ifndef rmb
+#define rmb() __asm__ __volatile__("lfence":::"memory");
+#endif
+#ifndef wmb
+#define wmb() barrier()
+#endif
+#ifdef SMP
+#define smp_mb() mb() 
+#define smp_rmb() rmb()
+#define smp_wmb() wmb()
+#define smp_read_barrier_depends()      read_barrier_depends()
+#define set_mb(var, value) do { xchg(&var, value); } while (0)
+#else
+#define smp_mb()        barrier()
+#define smp_rmb()       barrier()
+#define smp_wmb()       barrier()
+#define smp_read_barrier_depends()      do { } while(0)
+#define set_mb(var, value) do { var = value; barrier(); } while (0)
+#endif
+
+
+/* This is a barrier for the compiler only, NOT the processor! */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#define LOCK_PREFIX ""
+#define LOCK ""
+#define ADDR (*(volatile long *) addr)
+/*
+ * Make sure gcc doesn't try to be clever and move things around
+ * on us. We need to use _exactly_ the address the user gave us,
+ * not some alias that contains the same information.
+ */
+typedef struct { volatile int counter; } atomic_t;
+
+
+
+#define xen_xchg(ptr,v) \
+        ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
+struct __xchg_dummy { unsigned long a[100]; };
+#define __xg(x) ((volatile struct __xchg_dummy *)(x))
+static __inline unsigned long __xchg(unsigned long x, volatile void * ptr,
+                                   int size)
+{
+    switch (size) {
+    case 1:
+        __asm__ __volatile__("xchgb %b0,%1"
+                             :"=q" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 2:
+        __asm__ __volatile__("xchgw %w0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    case 4:
+        __asm__ __volatile__("xchgl %0,%1"
+                             :"=r" (x)
+                             :"m" (*__xg(ptr)), "0" (x)
+                             :"memory");
+        break;
+    }
+    return x;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is atomic and cannot be reordered.  
+ * It also implies a memory barrier.
+ */
+static __inline int test_and_clear_bit(int nr, volatile void * addr)
+{
+        int oldbit;
+
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %2,%1\n\tsbbl %0,%0"
+                :"=r" (oldbit),"=m" (ADDR)
+                :"Ir" (nr) : "memory");
+        return oldbit;
+}
+
+static __inline int constant_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline int variable_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    
+    __asm__ __volatile__(
+        "btl %2,%1\n\tsbbl %0,%0"
+        :"=r" (oldbit)
+        :"m" (ADDR),"Ir" (nr));
+    return oldbit;
+}
+
+#define test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ constant_test_bit((nr),(addr)) : \
+ variable_test_bit((nr),(addr)))
+
+
+/**
+ * set_bit - Atomically set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * This function is atomic and may not be reordered.  See __set_bit()
+ * if you do not require the atomic guarantees.
+ * Note that @nr may be almost arbitrarily large; this function is not
+ * restricted to acting on a single-word quantity.
+ */
+static __inline__ void set_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btsl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * clear_bit - Clears a bit in memory
+ * @nr: Bit to clear
+ * @addr: Address to start counting from
+ *
+ * clear_bit() is atomic and may not be reordered.  However, it does
+ * not contain a memory barrier, so if it is used for locking purposes,
+ * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
+ * in order to ensure changes are visible on other processors.
+ */
+static __inline__ void clear_bit(int nr, volatile void * addr)
+{
+        __asm__ __volatile__( LOCK_PREFIX
+                "btrl %1,%0"
+                :"=m" (ADDR)
+                :"Ir" (nr));
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ * 
+ * Atomically increments @v by 1.  Note that the guaranteed
+ * useful range of an atomic_t is only 24 bits.
+ */ 
+static __inline__ void atomic_inc(atomic_t *v)
+{
+        __asm__ __volatile__(
+                LOCK "incl %0"
+                :"=m" (v->counter)
+                :"m" (v->counter));
+}
+
+
+#define rdtscll(val) \
+     __asm__ __volatile__("rdtsc" : "=A" (val))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _OS_H_ */

Property changes on: amd64/include/xen/xen-os.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: amd64/include/xen/hypercall.h
===================================================================
--- amd64/include/xen/hypercall.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/hypercall.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,415 @@
+/******************************************************************************
+ * hypercall.h
+ * 
+ * Linux-specific hypervisor handling.
+ * 
+ * Copyright (c) 2002-2004, K A Fraser
+ * 
+ * 64-bit updates:
+ *   Benjamin Liu <benjamin.liu@intel.com>
+ *   Jun Nakajima <jun.nakajima@intel.com>
+ * 
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __MACHINE_XEN_HYPERCALL_H__
+#define __MACHINE_XEN_HYPERCALL_H__
+
+#include <sys/systm.h>
+
+#ifndef __XEN_HYPERVISOR_H__
+# error "please don't include this file directly"
+#endif
+
+#define __STR(x) #x
+#define STR(x) __STR(x)
+#define	ENOXENSYS	38
+#define CONFIG_XEN_COMPAT	0x030002
+#define __must_check
+
+#ifdef XEN
+#define HYPERCALL_STR(name)					\
+	"call hypercall_page + ("STR(__HYPERVISOR_##name)" * 32)"
+#else
+#define HYPERCALL_STR(name)					\
+	"mov $("STR(__HYPERVISOR_##name)" * 32),%%eax; "\
+	"add hypercall_stubs(%%rip),%%rax; "			\
+	"call *%%rax"
+#endif
+
+#define _hypercall0(type, name)			\
+({						\
+	type __res;				\
+	__asm__ volatile (				\
+		HYPERCALL_STR(name)		\
+		: "=a" (__res)			\
+		:				\
+		: "memory" );			\
+	__res;					\
+})
+
+#define _hypercall1(type, name, a1)				\
+({								\
+	type __res;						\
+	long __ign1;						\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1)			\
+		: "1" ((long)(a1))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall2(type, name, a1, a2)				\
+({								\
+	type __res;						\
+	long __ign1, __ign2;					\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2)	\
+		: "1" ((long)(a1)), "2" ((long)(a2))		\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall3(type, name, a1, a2, a3)			\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2), 	\
+		"=d" (__ign3)					\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		"3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall4(type, name, a1, a2, a3, a4)			\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	register long __arg4 __asm__("r10") = (long)(a4);		\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
+		  "=d" (__ign3), "+r" (__arg4)			\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		  "3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+#define _hypercall5(type, name, a1, a2, a3, a4, a5)		\
+({								\
+	type __res;						\
+	long __ign1, __ign2, __ign3;				\
+	register long __arg4 __asm__("r10") = (long)(a4);		\
+	register long __arg5 __asm__("r8") = (long)(a5);		\
+	__asm__ volatile (						\
+		HYPERCALL_STR(name)				\
+		: "=a" (__res), "=D" (__ign1), "=S" (__ign2),	\
+		  "=d" (__ign3), "+r" (__arg4), "+r" (__arg5)	\
+		: "1" ((long)(a1)), "2" ((long)(a2)),		\
+		  "3" ((long)(a3))				\
+		: "memory" );					\
+	__res;							\
+})
+
+static inline int __must_check
+HYPERVISOR_set_trap_table(
+	const trap_info_t *table)
+{
+	return _hypercall1(int, set_trap_table, table);
+}
+
+static inline int __must_check
+HYPERVISOR_mmu_update(
+	mmu_update_t *req, unsigned int count, unsigned int *success_count,
+	domid_t domid)
+{
+	return _hypercall4(int, mmu_update, req, count, success_count, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_mmuext_op(
+	struct mmuext_op *op, unsigned int count, unsigned int *success_count,
+	domid_t domid)
+{
+	return _hypercall4(int, mmuext_op, op, count, success_count, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_set_gdt(
+	unsigned long *frame_list, unsigned int entries)
+{
+	return _hypercall2(int, set_gdt, frame_list, entries);
+}
+
+static inline int __must_check
+HYPERVISOR_stack_switch(
+	unsigned long ss, unsigned long esp)
+{
+	return _hypercall2(int, stack_switch, ss, esp);
+}
+
+static inline int __must_check
+HYPERVISOR_set_callbacks(
+	unsigned long event_address, unsigned long failsafe_address, 
+	unsigned long syscall_address)
+{
+	return _hypercall3(int, set_callbacks,
+			   event_address, failsafe_address, syscall_address);
+}
+
+static inline int
+HYPERVISOR_fpu_taskswitch(
+	int set)
+{
+	return _hypercall1(int, fpu_taskswitch, set);
+}
+
+static inline int __must_check
+HYPERVISOR_sched_op_compat(
+	int cmd, unsigned long arg)
+{
+	return _hypercall2(int, sched_op_compat, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_sched_op(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, sched_op, cmd, arg);
+}
+
+static inline long __must_check
+HYPERVISOR_set_timer_op(
+	uint64_t timeout)
+{
+	return _hypercall1(long, set_timer_op, timeout);
+}
+
+static inline int __must_check
+HYPERVISOR_platform_op(
+	struct xen_platform_op *platform_op)
+{
+	platform_op->interface_version = XENPF_INTERFACE_VERSION;
+	return _hypercall1(int, platform_op, platform_op);
+}
+
+static inline int __must_check
+HYPERVISOR_set_debugreg(
+	unsigned int reg, unsigned long value)
+{
+	return _hypercall2(int, set_debugreg, reg, value);
+}
+
+static inline unsigned long __must_check
+HYPERVISOR_get_debugreg(
+	unsigned int reg)
+{
+	return _hypercall1(unsigned long, get_debugreg, reg);
+}
+
+static inline int __must_check
+HYPERVISOR_update_descriptor(
+	unsigned long ma, unsigned long word)
+{
+	return _hypercall2(int, update_descriptor, ma, word);
+}
+
+static inline int __must_check
+HYPERVISOR_memory_op(
+	unsigned int cmd, void *arg)
+{
+	return _hypercall2(int, memory_op, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_multicall(
+	multicall_entry_t *call_list, unsigned int nr_calls)
+{
+	return _hypercall2(int, multicall, call_list, nr_calls);
+}
+
+static inline int __must_check
+HYPERVISOR_update_va_mapping(
+	unsigned long va, uint64_t new_val, unsigned long flags)
+{
+	return _hypercall3(int, update_va_mapping, va, new_val, flags);
+}
+
+static inline int __must_check
+HYPERVISOR_event_channel_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, event_channel_op, cmd, arg);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (unlikely(rc == -ENOXENSYS)) {
+		struct evtchn_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, event_channel_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+
+	return rc;
+}
+
+static inline int __must_check
+HYPERVISOR_xen_version(
+	int cmd, void *arg)
+{
+	return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_console_io(
+	int cmd, unsigned int count, char *str)
+{
+	return _hypercall3(int, console_io, cmd, count, str);
+}
+
+static inline int __must_check
+HYPERVISOR_physdev_op(
+	int cmd, void *arg)
+{
+	int rc = _hypercall2(int, physdev_op, cmd, arg);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (unlikely(rc == -ENOXENSYS)) {
+		struct physdev_op op;
+		op.cmd = cmd;
+		memcpy(&op.u, arg, sizeof(op.u));
+		rc = _hypercall1(int, physdev_op_compat, &op);
+		memcpy(arg, &op.u, sizeof(op.u));
+	}
+#endif
+
+	return rc;
+}
+
+static inline int __must_check
+HYPERVISOR_grant_table_op(
+	unsigned int cmd, void *uop, unsigned int count)
+{
+	return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
+static inline int __must_check
+HYPERVISOR_update_va_mapping_otherdomain(
+	unsigned long va, uint64_t new_val, unsigned long flags, domid_t domid)
+{
+	return _hypercall4(int, update_va_mapping_otherdomain, va,
+			   new_val, flags, domid);
+}
+
+static inline int __must_check
+HYPERVISOR_vm_assist(
+	unsigned int cmd, unsigned int type)
+{
+	return _hypercall2(int, vm_assist, cmd, type);
+}
+
+static inline int __must_check
+HYPERVISOR_vcpu_op(
+	int cmd, unsigned int vcpuid, void *extra_args)
+{
+	return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args);
+}
+
+static inline int __must_check
+HYPERVISOR_set_segment_base(
+	int reg, unsigned long value)
+{
+	return _hypercall2(int, set_segment_base, reg, value);
+}
+
+static inline int __must_check
+HYPERVISOR_suspend(
+	unsigned long srec)
+{
+	struct sched_shutdown sched_shutdown = {
+		.reason = SHUTDOWN_suspend
+	};
+
+	int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown,
+			     &sched_shutdown, srec);
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+	if (rc == -ENOXENSYS)
+		rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown,
+				 SHUTDOWN_suspend, srec);
+#endif
+
+	return rc;
+}
+
+#if CONFIG_XEN_COMPAT <= 0x030002
+static inline int
+HYPERVISOR_nmi_op(
+	unsigned long op, void *arg)
+{
+	return _hypercall2(int, nmi_op, op, arg);
+}
+#endif
+
+#ifndef CONFIG_XEN
+static inline unsigned long __must_check
+HYPERVISOR_hvm_op(
+    int op, void *arg)
+{
+    return _hypercall2(unsigned long, hvm_op, op, arg);
+}
+#endif
+
+static inline int __must_check
+HYPERVISOR_callback_op(
+	int cmd, const void *arg)
+{
+	return _hypercall2(int, callback_op, cmd, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_xenoprof_op(
+	int op, void *arg)
+{
+	return _hypercall2(int, xenoprof_op, op, arg);
+}
+
+static inline int __must_check
+HYPERVISOR_kexec_op(
+	unsigned long op, void *args)
+{
+	return _hypercall2(int, kexec_op, op, args);
+}
+
+#undef __must_check
+
+#endif /* __MACHINE_XEN_HYPERCALL_H__ */

Property changes on: amd64/include/xen/hypercall.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: amd64/include/xen/xenvar.h
===================================================================
--- amd64/include/xen/xenvar.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/xenvar.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2008 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * $FreeBSD$
+ */
+#ifndef XENVAR_H_
+#define XENVAR_H_
+#define XBOOTUP 0x1
+#define XPMAP   0x2
+extern int xendebug_flags;
+#ifndef NOXENDEBUG
+#define XENPRINTF printk
+#else
+#define XENPRINTF printf
+#endif
+#include <xen/features.h>
+
+#if 0
+#define TRACE_ENTER XENPRINTF("(file=%s, line=%d) entered %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_EXIT XENPRINTF("(file=%s, line=%d) exiting %s\n", __FILE__, __LINE__, __FUNCTION__)
+#define TRACE_DEBUG(argflags, _f, _a...) \
+if (xendebug_flags & argflags) XENPRINTF("(file=%s, line=%d) " _f "\n", __FILE__, __LINE__, ## _a);
+#else
+#define TRACE_ENTER
+#define TRACE_EXIT
+#define TRACE_DEBUG(argflags, _f, _a...)
+#endif
+
+#ifdef XENHVM
+
+static inline vm_paddr_t
+phystomach(vm_paddr_t pa)
+{
+
+	return (pa);
+}
+
+static inline vm_paddr_t
+machtophys(vm_paddr_t ma)
+{
+
+	return (ma);
+}
+
+#define vtomach(va)	pmap_kextract((vm_offset_t) (va))
+#define PFNTOMFN(pa)	(pa)
+#define MFNTOPFN(ma)	(ma)
+
+#define set_phys_to_machine(pfn, mfn)	((void)0)
+#define PT_UPDATES_FLUSH()		((void)0)
+
+#else
+
+extern	xen_pfn_t *xen_phys_machine;
+
+
+extern xen_pfn_t *xen_machine_phys;
+/* Xen starts physical pages after the 4MB ISA hole -
+ * FreeBSD doesn't
+ */
+
+
+#undef ADD_ISA_HOLE /* XXX */
+
+#ifdef ADD_ISA_HOLE
+#define ISA_INDEX_OFFSET 1024 
+#define ISA_PDR_OFFSET 1
+#else
+#define ISA_INDEX_OFFSET 0
+#define ISA_PDR_OFFSET 0
+#endif
+
+
+#define PFNTOMFN(i) (xen_phys_machine[(i)])
+#define MFNTOPFN(i) ((vm_paddr_t)xen_machine_phys[(i)])
+
+#define VTOP(x) ((((uintptr_t)(x))) - KERNBASE)
+#define PTOV(x) (((uintptr_t)(x)) + KERNBASE)
+
+#define VTOPFN(x) (VTOP(x) >> PAGE_SHIFT)
+#define PFNTOV(x) PTOV((vm_paddr_t)(x)  << PAGE_SHIFT)
+
+#define VTOMFN(va) (vtomach(va) >> PAGE_SHIFT)
+#define PFN_UP(x)    (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+
+#define phystomach(pa) (((vm_paddr_t)(PFNTOMFN((pa) >> PAGE_SHIFT))) << PAGE_SHIFT)
+#define machtophys(ma) (((vm_paddr_t)(MFNTOPFN((ma) >> PAGE_SHIFT))) << PAGE_SHIFT)
+
+#endif
+
+void xpq_init(void);
+
+int  xen_create_contiguous_region(vm_page_t pages, int npages);
+
+void  xen_destroy_contiguous_region(void * addr, int npages);
+
+#endif

Property changes on: amd64/include/xen/xenvar.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: amd64/include/xen/synch_bitops.h
===================================================================
--- amd64/include/xen/synch_bitops.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/synch_bitops.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,129 @@
+#ifndef __XEN_SYNCH_BITOPS_H__
+#define __XEN_SYNCH_BITOPS_H__
+
+/*
+ * Copyright 1992, Linus Torvalds.
+ * Heavily modified to provide guaranteed strong synchronisation
+ * when communicating with Xen or other guest OSes running on other CPUs.
+ */
+
+
+#define ADDR (*(volatile long *) addr)
+
+static __inline__ void synch_set_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ ( 
+        "lock btsl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_clear_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btrl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ void synch_change_bit(int nr, volatile void * addr)
+{
+    __asm__ __volatile__ (
+        "lock btcl %1,%0"
+        : "=m" (ADDR) : "Ir" (nr) : "memory" );
+}
+
+static __inline__ int synch_test_and_set_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btsl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_clear_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "lock btrl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+static __inline__ int synch_test_and_change_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+
+    __asm__ __volatile__ (
+        "lock btcl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit), "=m" (ADDR) : "Ir" (nr) : "memory");
+    return oldbit;
+}
+
+struct __synch_xchg_dummy { unsigned long a[100]; };
+#define __synch_xg(x) ((volatile struct __synch_xchg_dummy *)(x))
+
+#define synch_cmpxchg(ptr, old, new) \
+((__typeof__(*(ptr)))__synch_cmpxchg((ptr),\
+                                     (unsigned long)(old), \
+                                     (unsigned long)(new), \
+                                     sizeof(*(ptr))))
+
+static inline unsigned long __synch_cmpxchg(volatile void *ptr,
+					    unsigned long old,
+					    unsigned long new, int size)
+{
+	unsigned long prev;
+	switch (size) {
+	case 1:
+		__asm__ __volatile__("lock; cmpxchgb %b1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 2:
+		__asm__ __volatile__("lock; cmpxchgw %w1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 4:
+		__asm__ __volatile__("lock; cmpxchgl %k1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	case 8:
+		__asm__ __volatile__("lock; cmpxchgq %1,%2"
+				     : "=a"(prev)
+				     : "q"(new), "m"(*__synch_xg(ptr)),
+				       "0"(old)
+				     : "memory");
+		return prev;
+	}
+	return old;
+}
+
+static __inline__ int synch_const_test_bit(int nr, const volatile void * addr)
+{
+    return ((1UL << (nr & 31)) & 
+            (((const volatile unsigned int *) addr)[nr >> 5])) != 0;
+}
+
+static __inline__ int synch_var_test_bit(int nr, volatile void * addr)
+{
+    int oldbit;
+    __asm__ __volatile__ (
+        "btl %2,%1\n\tsbbl %0,%0"
+        : "=r" (oldbit) : "m" (ADDR), "Ir" (nr) );
+    return oldbit;
+}
+
+#define synch_test_bit(nr,addr) \
+(__builtin_constant_p(nr) ? \
+ synch_const_test_bit((nr),(addr)) : \
+ synch_var_test_bit((nr),(addr)))
+
+#endif /* __XEN_SYNCH_BITOPS_H__ */

Property changes on: amd64/include/xen/synch_bitops.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: amd64/include/xen/xenfunc.h
===================================================================
--- amd64/include/xen/xenfunc.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/xenfunc.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,83 @@
+/*
+ *
+ * Copyright (c) 2004,2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENFUNC_H_
+#define _XEN_XENFUNC_H_
+
+#ifdef XENHVM
+#include <machine/xen/xenvar.h>
+#else
+#include <machine/xen/xenpmap.h>
+#include <machine/segments.h>
+#endif
+
+#define BKPT __asm__("int3");
+#define XPQ_CALL_DEPTH 5
+#define XPQ_CALL_COUNT 2
+#define PG_PRIV PG_AVAIL3
+typedef struct { 
+	unsigned long pt_ref;
+	unsigned long pt_eip[XPQ_CALL_COUNT][XPQ_CALL_DEPTH];
+} pteinfo_t;
+
+extern pteinfo_t *pteinfo_list;
+#ifdef XENDEBUG_LOW
+#define	__PRINTK(x) printk x
+#else
+#define	__PRINTK(x)
+#endif
+
+char *xen_setbootenv(char *cmd_line);
+
+int  xen_boothowto(char *envp);
+
+void _xen_machphys_update(vm_paddr_t, vm_paddr_t, char *file, int line);
+
+#ifdef INVARIANTS
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_machphys_update(a, b) _xen_machphys_update((a), (b), NULL, 0)
+#endif	
+
+#ifndef XENHVM
+void xen_update_descriptor(union descriptor *, union descriptor *);
+#endif
+
+extern struct mtx balloon_lock;
+#if 0
+#define balloon_lock(__flags)   mtx_lock_irqsave(&balloon_lock, __flags)
+#define balloon_unlock(__flags) mtx_unlock_irqrestore(&balloon_lock, __flags)
+#else
+#define balloon_lock(__flags)   __flags = 1
+#define balloon_unlock(__flags) __flags = 0
+#endif
+
+
+
+#endif /* _XEN_XENFUNC_H_ */

Property changes on: amd64/include/xen/xenfunc.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native

Index: amd64/include/xen/xenpmap.h
===================================================================
--- amd64/include/xen/xenpmap.h	(.../stable/6/sys)	(revision 0)
+++ amd64/include/xen/xenpmap.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,227 @@
+/*
+ *
+ * Copyright (c) 2004 Christian Limpach.
+ * Copyright (c) 2004,2005 Kip Macy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *      This product includes software developed by Christian Limpach.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _XEN_XENPMAP_H_
+#define _XEN_XENPMAP_H_
+
+#include <machine/xen/features.h>
+
+void _xen_queue_pt_update(vm_paddr_t, vm_paddr_t, char *, int);
+void xen_pt_switch(vm_paddr_t);
+void xen_set_ldt(vm_paddr_t, unsigned long);
+void xen_pgdpt_pin(vm_paddr_t);
+void xen_pgd_pin(vm_paddr_t);
+void xen_pgd_unpin(vm_paddr_t);
+void xen_pt_pin(vm_paddr_t);
+void xen_pt_unpin(vm_paddr_t);
+void xen_flush_queue(void);
+void xen_check_queue(void);
+#if 0
+void pmap_ref(pt_entry_t *pte, vm_paddr_t ma);
+#endif
+
+#ifdef INVARIANTS
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), __FILE__, __LINE__)
+#else
+#define xen_queue_pt_update(a, b) _xen_queue_pt_update((a), (b), NULL, 0)
+#endif	
+
+#ifdef PMAP_DEBUG
+#define PMAP_REF pmap_ref
+#define PMAP_DEC_REF_PAGE pmap_dec_ref_page
+#define PMAP_MARK_PRIV pmap_mark_privileged
+#define PMAP_MARK_UNPRIV pmap_mark_unprivileged
+#else 
+#define PMAP_MARK_PRIV(a)
+#define PMAP_MARK_UNPRIV(a)
+#define PMAP_REF(a, b)
+#define PMAP_DEC_REF_PAGE(a)
+#endif
+
+#define ALWAYS_SYNC 0
+
+#ifdef PT_DEBUG
+#define PT_LOG() printk("WP PT_SET %s:%d\n", __FILE__, __LINE__) 
+#else
+#define PT_LOG()
+#endif
+
+#define INVALID_P2M_ENTRY	(~0UL)
+
+#define pmap_valid_entry(E)           ((E) & PG_V) /* is PDE or PTE valid? */
+
+#define SH_PD_SET_VA        1
+#define SH_PD_SET_VA_MA     2
+#define SH_PD_SET_VA_CLEAR  3
+
+struct pmap;
+void pd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type);
+#ifdef notyet
+static vm_paddr_t
+vptetomachpte(vm_paddr_t *pte)
+{
+	vm_offset_t offset, ppte;
+	vm_paddr_t pgoffset, retval, *pdir_shadow_ptr;
+	int pgindex;
+
+	ppte = (vm_offset_t)pte;
+	pgoffset = (ppte & PAGE_MASK);
+	offset = ppte - (vm_offset_t)PTmap;
+	pgindex = ppte >> PDRSHIFT;
+
+	pdir_shadow_ptr = (vm_paddr_t *)PCPU_GET(pdir_shadow);
+	retval = (pdir_shadow_ptr[pgindex] & ~PAGE_MASK) + pgoffset;
+	return (retval);
+}
+#endif
+#define	PT_GET(_ptp)						\
+	(pmap_valid_entry(*(_ptp)) ? xpmap_mtop(*(_ptp)) : (0))
+
+#ifdef WRITABLE_PAGETABLES
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        PT_LOG();                                               \
+        *(_ptp) = xpmap_ptom((_npte));                          \
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+        PT_LOG();                                               \
+        *(_ptp) = (_npte);                                      \
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        PT_LOG();                                               \
+        *(_ptp) = 0;                                            \
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA);           \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptp, _npte, sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptp),(_npte), SH_PD_SET_VA_MA);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptp, sync) do {			\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptp), 0, SH_PD_SET_VA_CLEAR);  	\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#else /* !WRITABLE_PAGETABLES */
+
+#define PT_SET_VA(_ptp,_npte,sync) do {				\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+	xen_queue_pt_update(vtomach(_ptp), 	        \
+			    xpmap_ptom(_npte)); 		\
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_SET_VA_MA(_ptp,_npte,sync) do {		        \
+        PMAP_REF((_ptp), (_npte));                              \
+	xen_queue_pt_update(vtomach(_ptp), _npte);        \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PT_CLEAR_VA(_ptp, sync) do {				\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+	xen_queue_pt_update(vtomach(_ptp), 0);            \
+	if (sync || ALWAYS_SYNC)				\
+		xen_flush_queue();				\
+} while (/*CONSTCOND*/0)
+
+#define PD_SET_VA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), xpmap_ptom(_npte));                    \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA);     \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();     	\
+} while (/*CONSTCOND*/0)
+#define PD_SET_VA_MA(_pmap, _ptepindex,_npte,sync) do {		\
+        PMAP_REF((_ptp), (_npte));                              \
+        pd_set((_pmap),(_ptepindex),(_npte), SH_PD_SET_VA_MA);  \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+#define PD_CLEAR_VA(_pmap, _ptepindex, sync) do {		\
+        PMAP_REF((pt_entry_t *)(_ptp), 0);                      \
+        pd_set((_pmap),(_ptepindex), 0, SH_PD_SET_VA_CLEAR);    \
+	if (sync || ALWAYS_SYNC) xen_flush_queue();		\
+} while (/*CONSTCOND*/0)
+
+#endif
+
+#define PT_SET_MA(_va, _ma) 					\
+do { 								\
+   PANIC_IF(HYPERVISOR_update_va_mapping(((unsigned long)(_va)),\
+	   (_ma),						\
+	   UVMF_INVLPG| UVMF_ALL) < 0);			\
+} while (/*CONSTCOND*/0)	  
+
+#define	PT_UPDATES_FLUSH() do {				        \
+        xen_flush_queue();                                      \
+} while (/*CONSTCOND*/0)
+
+static __inline vm_paddr_t
+xpmap_mtop(vm_paddr_t mpa)
+{
+	vm_paddr_t tmp = (mpa & PG_FRAME);
+	
+	return machtophys(tmp) | (mpa & ~PG_FRAME);
+}
+
+static __inline vm_paddr_t
+xpmap_ptom(vm_paddr_t ppa)
+{
+	vm_paddr_t tmp = (ppa & PG_FRAME);
+
+	return phystomach(tmp) | (ppa & ~PG_FRAME);
+}
+
+static __inline void
+set_phys_to_machine(unsigned long pfn, unsigned long mfn)
+{
+#ifdef notyet	
+        PANIC_IF(max_mapnr && pfn >= max_mapnr);
+#endif	
+        if (xen_feature(XENFEAT_auto_translated_physmap)) {
+#ifdef notyet		
+                PANIC_IF((pfn != mfn && mfn != INVALID_P2M_ENTRY));
+#endif		
+                return;
+        }
+        xen_phys_machine[pfn] = mfn;
+}
+
+
+
+
+#endif /* _XEN_XENPMAP_H_ */

Property changes on: amd64/include/xen/xenpmap.h
___________________________________________________________________
Added: svn:mime-type
   + text/plain
Added: svn:keywords
   + FreeBSD=%H
Added: svn:eol-style
   + native


Property changes on: amd64/include/xen
___________________________________________________________________
Added: svn:mergeinfo
   Merged /stable/7/sys/i386/include/xen:r172506,172810,175956,179044,179776,180149,182402
   Merged /head/sys/i386/include/xen:r153880,155086,155957,157624,158737,159574,159762,159802,159806,159810-159812,160052,162099,162118,162122,162458,162473,162619,162687-162688,163246,163398-163399,164281,164375,165225,165727,165852,165854,166067,166181,166901,169152,169451,169562,169609,169611,169796,169876,170273,170284,170405,170478,170802,170872,171053,171821-171822,171980,172025,172334,172607,172825,172919,172998,173081,173468,173592,173804,174385,174510,174756,174987,175005,175019-175021,175053,175162,175328-175329,175417,175466,176431,176526,176596,176996,177104,177228,177274,177289,177296,177462,177560,177567,177619,177635,177662,177685,177695,177862,177899,178033,178112,178241,178280,178589,178667,178719,178814,178920,178996,179057,179159,179174,179296,179335-179338,179343,179347,179425,179445,179488,179510,179631,179637,179655,179705,179716,179765,179831,179879,179925,179969,179971,180037-180038,180073,180077,180145,180152-180153,180220,180252-180253,180298-180299,180374,180382-180384,180437,180447,180503,180515,180567,180582,180612,180668,180753,180869,180946,180950,180952,180954,180981,181000,181002,181007,181016,181018,181020,181024,181089,181093,181129,181132,181333,181336,181399,181433,181436,181556-181557,181603,181606,181617-181619,181701,181824,181934,181953,181972,181976,181992,182003,182020,182046,182055,182060,182062,182066,182070,182078,182108,182110-182111,182115,182119,182122,182161,182321,182380,182391,182401,182461,182488,182600,182688,182713,182885,182887-182888,182913,182936,183078,183135,183236,183264,183628

Index: amd64/conf/XENHVM
===================================================================
--- amd64/conf/XENHVM	(.../stable/6/sys)	(revision 0)
+++ amd64/conf/XENHVM	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -0,0 +1,143 @@
+#
+# XENHVM -- Xen HVM kernel configuration file for FreeBSD/amd64
+#
+# For more information on this file, please read the handbook section on
+# Kernel Configuration Files:
+#
+#    http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html
+#
+# The handbook is also available locally in /usr/share/doc/handbook
+# if you've installed the doc distribution, otherwise always see the
+# FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the
+# latest information.
+#
+# An exhaustive list of options and more detailed explanations of the
+# device lines is also present in the ../../conf/NOTES and NOTES files.
+# If you are in doubt as to the purpose or necessity of a line, check first
+# in NOTES.
+#
+# $FreeBSD$
+
+machine		amd64
+cpu		HAMMER
+ident		XENHVM
+
+# To statically compile in device wiring instead of /boot/device.hints
+#hints		"GENERIC.hints"		# Default places to look for devices.
+
+makeoptions	DEBUG=-g		# Build kernel with gdb(1) debug symbols
+makeoptions	MODULES_OVERRIDE=""
+
+options 	SCHED_4BSD		# 4BSD scheduler
+options 	PREEMPTION		# Enable kernel thread preemption
+options 	INET			# InterNETworking
+options 	INET6			# IPv6 communications protocols
+options 	FFS			# Berkeley Fast Filesystem
+options 	SOFTUPDATES		# Enable FFS soft updates support
+options 	UFS_ACL			# Support for access control lists
+options 	UFS_DIRHASH		# Improve performance on big directories
+options 	MD_ROOT			# MD is a potential root device
+options 	NFSCLIENT		# Network Filesystem Client
+options 	NFSSERVER		# Network Filesystem Server
+options 	NFSLOCKD		# Network Lock Manager
+options 	NFS_ROOT		# NFS usable as /, requires NFSCLIENT
+options 	NTFS			# NT File System
+options 	MSDOSFS			# MSDOS Filesystem
+options 	CD9660			# ISO 9660 Filesystem
+options 	PROCFS			# Process filesystem (requires PSEUDOFS)
+options 	PSEUDOFS		# Pseudo-filesystem framework
+options 	GEOM_GPT		# GUID Partition Tables.
+options 	COMPAT_43		# Needed by COMPAT_LINUX32
+options 	COMPAT_IA32		# Compatible with i386 binaries
+options 	COMPAT_FREEBSD4		# Compatible with FreeBSD4
+options 	COMPAT_FREEBSD5		# Compatible with FreeBSD5
+options 	COMPAT_LINUX32		# Compatible with i386 linux binaries 
+options 	SCSI_DELAY=5000		# Delay (in ms) before probing SCSI
+options 	KTRACE			# ktrace(1) support
+options 	SYSVSHM			# SYSV-style shared memory
+options 	SYSVMSG			# SYSV-style message queues
+options 	SYSVSEM			# SYSV-style semaphores
+options 	_KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions
+options 	KBD_INSTALL_CDEV	# install a CDEV entry in /dev
+options		NO_ADAPTIVE_MUTEXES
+#options 	ADAPTIVE_GIANT		# Giant mutex is adaptive.
+
+options		KDB
+options		DDB
+
+# Workarounds for some known-to-be-broken chipsets (nVidia nForce3-Pro150)
+device		atpic			# 8259A compatability
+
+# Linux 32-bit ABI support
+options 	LINPROCFS		# Cannot be a module yet.
+options		SMP
+options		GDB
+
+# Bus support.
+device		acpi
+device		pci
+
+# Floppy drives
+#device		fdc
+
+# Xen HVM support
+options		XENHVM
+device		xenpci
+
+# ATA and ATAPI devices
+device		ata
+device		atadisk		# ATA disk drives
+device		ataraid		# ATA RAID drives
+device		atapicd		# ATAPI CDROM drives
+device		atapifd		# ATAPI floppy drives
+device		atapist		# ATAPI tape drives
+options 	ATA_STATIC_ID	# Static device numbering
+
+
+# SCSI peripherals
+device		scbus		# SCSI bus (required for SCSI)
+device		da		# Direct Access (disks)
+device		sa		# Sequential Access (tape etc)
+device		cd		# CD
+device		pass		# Passthrough device (direct SCSI access)
+
+# atkbdc0 controls both the keyboard and the PS/2 mouse
+device		atkbdc		# AT keyboard controller
+device		atkbd		# AT keyboard
+device		psm		# PS/2 mouse
+
+device		kbdmux		# keyboard multiplexer
+
+device		vga		# VGA video card driver
+
+device		splash		# Splash screen and screen saver support
+
+# syscons is the default console driver, resembling an SCO console
+device		sc
+
+device		agp		# support several AGP chipsets
+
+# Serial (COM) ports
+device		sio		# 8250, 16[45]50 based serial ports
+
+# PCI Ethernet NICs that use the common MII bus controller code.
+# NOTE: Be sure to keep the 'device miibus' line in order to use these NICs!
+device		miibus		# MII bus support
+device		re		# RealTek 8139C+/8169/8169S/8110S
+
+# Pseudo devices.
+device		loop		# Network loopback
+device		random		# Entropy device
+device		ether		# Ethernet support
+device		sl		# Kernel SLIP
+device		ppp		# Kernel PPP
+device		tun		# Packet tunnel.
+device		pty		# Pseudo-ttys (telnet etc)
+device		md		# Memory "disks"
+device		gif		# IPv6 and IPv4 tunneling
+device		faith		# IPv6-to-IPv4 relaying (translation)
+
+# The `bpf' device enables the Berkeley Packet Filter.
+# Be aware of the administrative consequences of enabling this!
+# Note that 'bpf' is required for DHCP.
+device		bpf		# Berkeley packet filter
Index: amd64/amd64/pmap.c
===================================================================
--- amd64/amd64/pmap.c	(.../stable/6/sys)	(revision 184012)
+++ amd64/amd64/pmap.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -144,6 +144,11 @@
 #ifdef SMP
 #include <machine/smp.h>
 #endif
+#ifdef XENHVM
+#include <machine/xen/xen-os.h>
+#include <xen/hypervisor.h>
+#include <xen/interface/hvm/hvm_op.h>
+#endif
 
 #ifndef PMAP_SHPGPERPROC
 #define PMAP_SHPGPERPROC 200
@@ -793,8 +798,20 @@
 
 	sched_pin();
 	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
-		invltlb();
-		smp_invltlb();
+#if defined(XENHVM) && defined(notdef)
+		/*
+		 * As far as I can tell, this makes things slower, at
+		 * least where there are only two physical cpus and
+		 * the host is not overcommitted.
+		 */
+		if (is_running_on_xen()) {
+			HYPERVISOR_hvm_op(HVMOP_flush_tlbs, NULL);
+		} else
+#endif
+		{
+			invltlb();
+			smp_invltlb();
+		}
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
Index: amd64/amd64/machdep.c
===================================================================
--- amd64/amd64/machdep.c	(.../stable/6/sys)	(revision 184012)
+++ amd64/amd64/machdep.c	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -1352,6 +1352,14 @@
 	if (env != NULL)
 		strlcpy(kernelname, env, sizeof(kernelname));
 
+#ifdef XENHVM
+	if (inw(0x10) == 0x49d2) {
+		if (bootverbose)
+			printf("Xen detected: disabling emulated block and network devices\n");
+		outw(0x10, 3);
+	}
+#endif
+
 	/* Location of kernel stack for locore */
 	return ((u_int64_t)thread0.td_pcb);
 }
Index: sys/libkern.h
===================================================================
--- sys/libkern.h	(.../stable/6/sys)	(revision 184012)
+++ sys/libkern.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -98,6 +98,7 @@
 int	 strcasecmp(const char *, const char *);
 char	*strcat(char * __restrict, const char * __restrict);
 int	 strcmp(const char *, const char *);
+size_t	strcspn(const char *s, const char *charset);
 char	*strcpy(char * __restrict, const char * __restrict);
 char	*strdup(const char *__restrict, struct malloc_type *);
 size_t	 strlcat(char *, const char *, size_t);
@@ -151,6 +152,18 @@
 	return (b);
 }
 
+static __inline char *
+strchr(const char *p, int ch)
+{
+	return (index(p, ch));
+}
+
+static __inline char *
+strrchr(const char *p, int ch)
+{
+	return (rindex(p, ch));
+}
+
 /* fnmatch() return values. */
 #define	FNM_NOMATCH	1	/* Match failed. */
 
Index: sys/mutex.h
===================================================================
--- sys/mutex.h	(.../stable/6/sys)	(revision 184012)
+++ sys/mutex.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -325,6 +325,9 @@
 #define mtx_trylock_flags(m, opts)					\
 	_mtx_trylock((m), (opts), LOCK_FILE, LOCK_LINE)
 
+#define	mtx_sleep(chan, mtx, pri, wmesg, timo)				\
+	_sleep((chan), &(mtx)->mtx_object, (pri), (wmesg), (timo))
+
 #define	mtx_initialized(m)	lock_initalized(&(m)->mtx_object)
 
 #define mtx_owned(m)	(((m)->mtx_lock & MTX_FLAGMASK) == (uintptr_t)curthread)
Index: sys/sx.h
===================================================================
--- sys/sx.h	(.../stable/6/sys)	(revision 184012)
+++ sys/sx.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -257,6 +257,9 @@
 		sx_sunlock(sx);						\
 } while (0)
 
+#define	sx_sleep(chan, sx, pri, wmesg, timo)				\
+	_sleep((chan), &(sx)->lock_object, (pri), (wmesg), (timo))
+
 /*
  * Options passed to sx_init_flags().
  */
Index: sys/systm.h
===================================================================
--- sys/systm.h	(.../stable/6/sys)	(revision 184012)
+++ sys/systm.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -113,6 +113,7 @@
  * General function declarations.
  */
 
+struct lock_object;
 struct clockframe;
 struct malloc_type;
 struct mtx;
@@ -295,6 +296,8 @@
  * Common `proc' functions are declared here so that proc.h can be included
  * less often.
  */
+int	_sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg,
+	    int timo) __nonnull(1);
 int	msleep(void *chan, struct mtx *mtx, int pri, const char *wmesg,
 	    int timo);
 int	msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo);
Index: sys/sleepqueue.h
===================================================================
--- sys/sleepqueue.h	(.../stable/6/sys)	(revision 184012)
+++ sys/sleepqueue.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -83,11 +83,13 @@
 #ifdef _KERNEL
 
 #define	SLEEPQ_TYPE		0x0ff		/* Mask of sleep queue types. */
-#define	SLEEPQ_MSLEEP		0x00		/* Used by msleep/wakeup. */
+#define	SLEEPQ_SLEEP		0x00		/* Used by msleep/wakeup. */
 #define	SLEEPQ_CONDVAR		0x01		/* Used for a cv. */
+#define	SLEEPQ_PAUSE		0x02		/* Used by pause. */
 #define	SLEEPQ_SX		0x03		/* Used by an sx lock. */
 #define	SLEEPQ_INTERRUPTIBLE	0x100		/* Sleep is interruptible. */
 
+
 void	init_sleepqueues(void);
 void	sleepq_abort(struct thread *td, int intrval);
 void	sleepq_add(void *, struct lock_object *, const char *, int, int);
Index: sys/lock.h
===================================================================
--- sys/lock.h	(.../stable/6/sys)	(revision 184012)
+++ sys/lock.h	(.../user/dfr/xenhvm/6/sys)	(revision 190588)
@@ -51,6 +51,8 @@
 	const	char *lc_name;
 	u_int	lc_flags;
 	void	(*lc_ddb_show)(struct lock_object *lock);
+	void	(*lc_lock)(struct lock_object *lock, int how);
+	int	(*lc_unlock)(struct lock_object *lock);
 };
 
 #define	LC_SLEEPLOCK	0x00000001	/* Sleep lock. */

Property changes on: .
___________________________________________________________________
Modified: svn:mergeinfo
   Merged /projects/releng_6_xen/sys:r185181-186767