==== //depot/vendor/FreeBSD/stable/9/sys/amd64/include/apicvar.h#2 (text) - //SpectraBSD/stable/sys/amd64/include/apicvar.h#1 (text) ==== content @@ -227,6 +227,7 @@ enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(int boot); +void xen_intr_handle_upcall(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ ==== //depot/vendor/FreeBSD/stable/9/sys/amd64/include/intr_machdep.h#5 (text) - //SpectraBSD/stable/sys/amd64/include/intr_machdep.h#2 (text) ==== content @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: stable/9/sys/amd64/include/intr_machdep.h 247877 2013-03-06 09:22:45Z avg $ + * $FreeBSD: stable/9/sys/amd64/include/intr_machdep.h 235260 2012-05-11 04:10:23Z attilio $ */ #ifndef __MACHINE_INTR_MACHDEP_H__ @@ -44,12 +44,24 @@ * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 4863 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. @@ -94,7 +106,7 @@ int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); - TAILQ_ENTRY(pic) pics; + STAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ ==== //depot/vendor/FreeBSD/stable/9/sys/amd64/include/pcpu.h#4 (text) - //SpectraBSD/stable/sys/amd64/include/pcpu.h#2 (text) ==== content @@ -42,15 +42,6 @@ #endif #endif -#ifdef XENHVM -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i -#else -#define PCPU_XEN_FIELDS -#endif - /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. @@ -76,8 +67,7 @@ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ - u_int pc_cmci_mask /* MCx banks for CMCI */ \ - PCPU_XEN_FIELDS + u_int pc_cmci_mask /* MCx banks for CMCI */ #ifdef _KERNEL ==== //depot/vendor/FreeBSD/stable/9/sys/amd64/include/segments.h#4 (text) - //SpectraBSD/stable/sys/amd64/include/segments.h#2 (text) ==== content @@ -215,6 +215,7 @@ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ #define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) ==== //depot/vendor/FreeBSD/stable/9/sys/amd64/include/xen/xen-os.h#1 (text) - //SpectraBSD/stable/sys/amd64/include/xen/xen-os.h#1 (text) ==== content @@ -11,16 +11,19 @@ #define CONFIG_X86_PAE #endif +#ifdef LOCORE +#define __ASSEMBLY__ +#endif + #if !defined(__XEN_INTERFACE_VERSION__) -/* - * Can update to a more recent version when we implement - * the hypercall page - */ -#define __XEN_INTERFACE_VERSION__ 0x00030204 +#define __XEN_INTERFACE_VERSION__ 0x00030208 #endif #include +/* Everything below this point is not included by assembler (.S) files. */ +#ifndef __ASSEMBLY__ + /* Force a proper event-channel callback from Xen. */ void force_evtchn_callback(void); @@ -28,6 +31,20 @@ extern shared_info_t *HYPERVISOR_shared_info; +typedef enum { + XOC_NONE = 0x0, + + /** Xen interrupt delivery can be bound to a specific CPU. */ + XOC_PCPU_INTR_BINDING = 0x1 +} xen_os_capability_t; + +/** + * Query the capabilites of FreeBSD's Xen Support. + * + * \return A bitmap of supported capabilities. + */ +xen_os_capability_t xen_os_capabilities(void); + /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static inline void rep_nop(void) { @@ -41,10 +58,6 @@ void *bootmem_alloc(unsigned int size); void bootmem_free(void *ptr, unsigned int size); - -/* Everything below this point is not included by assembler (.S) files. */ -#ifndef __ASSEMBLY__ - void printk(const char *fmt, ...); /* some function prototypes */ @@ -126,14 +139,14 @@ #else #endif -#ifndef mb -#define mb() __asm__ __volatile__("mfence":::"memory") +#ifndef xen_mb +#define xen_mb() mb() #endif -#ifndef rmb -#define rmb() __asm__ __volatile__("lfence":::"memory"); +#ifndef xen_rmb +#define xen_rmb() rmb() #endif -#ifndef wmb -#define wmb() barrier() +#ifndef xen_wmb +#define xen_wmb() wmb() #endif #ifdef SMP #define smp_mb() mb() ==== //depot/vendor/FreeBSD/stable/9/sys/conf/files#51 (text) - //SpectraBSD/stable/sys/conf/files#6 (text) ==== content @@ -3568,7 +3618,6 @@ xen/gnttab.c optional xen | xenhvm xen/features.c optional xen | xenhvm -xen/evtchn/evtchn.c optional xen xen/evtchn/evtchn_dev.c optional xen | xenhvm xen/xenbus/xenbus_if.m optional xen | xenhvm xen/xenbus/xenbus.c optional xen | xenhvm @@ -3587,4 +3636,4 @@ dev/xen/netback/netback.c optional xen | xenhvm dev/xen/netfront/netfront.c optional xen | xenhvm dev/xen/xenpci/xenpci.c optional xenpci -dev/xen/xenpci/evtchn.c optional xenpci +dev/xen/timer/timer.c optional xen | xenhvm ==== //depot/vendor/FreeBSD/stable/9/sys/conf/files.amd64#16 (text) - //SpectraBSD/stable/sys/conf/files.amd64#3 (text) ==== content @@ -493,3 +484,5 @@ x86/x86/msi.c optional pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm ==== //depot/vendor/FreeBSD/stable/9/sys/conf/files.i386#13 (text) - //SpectraBSD/stable/sys/conf/files.i386#3 (text) ==== content @@ -545,3 +536,5 @@ x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/blkback/blkback.c#6 (text) - //SpectraBSD/stable/sys/dev/xen/blkback/blkback.c#1 (text) ==== content @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2009-2011 Spectra Logic Corporation + * Copyright (c) 2009-2012 Spectra Logic Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,7 +31,7 @@ * Ken Merry (Spectra Logic Corporation) */ #include -__FBSDID("$FreeBSD: stable/9/sys/dev/xen/blkback/blkback.c 249132 2013-04-05 08:22:11Z mav $"); +__FBSDID("$FreeBSD: stable/9/sys/dev/xen/blkback/blkback.c 232310 2012-02-29 18:41:59Z gibbs $"); /** * \file blkback.c @@ -73,11 +73,11 @@ #include #include +#include #include #include #include -#include #include #include @@ -99,7 +99,8 @@ * backend device, with a copy-in/out to the remote domain's memory. * * \note This option is currently required when this driver's domain is - * operating in HVM mode on a system using an IOMMU. + * operating in HVM mode on a system using an IOMMU on hypervisors + * prior to Xen 4.1.2. * * This driver uses Xen's grant table API to gain access to the memory of * the remote domains it serves. When our domain is operating in PV mode, @@ -108,10 +109,10 @@ * that blkback and the backing devices it uses can safely perform DMA * operations to satisfy requests. In HVM mode, Xen may use a HW IOMMU to * insure that our domain cannot DMA to pages owned by another domain. As - * of Xen 4.0, IOMMU mappings for HVM guests are not updated via the grant - * table API. For this reason, in HVM mode, we must bounce all requests into - * memory that is mapped into our domain at domain startup and thus has - * valid IOMMU mappings. + * of Xen 4.1.2, IOMMU mappings for HVM guests are updated via the grant + * table API. On earlier hypervisors, in HVM mode, the IOMMU is not updated. + * All requests must be bounced through memory that is mapped into our domain + * at domain startup to ensure IOMMU allows the access. */ #define XBB_USE_BOUNCE_BUFFERS @@ -186,6 +187,49 @@ XBB_REQLIST_MAPPED = 0x01 } xbb_reqlist_flags; +/** + * \brief Structure used to memoize information about a per-request + * scatter-gather list. + * + * The chief benefit of using this data structure is it avoids having + * to reparse the possibly discontiguous S/G list in the original + * request. Due to the way that the mapping of the memory backing an + * I/O transaction is handled by Xen, a second pass is unavoidable. + * At least this way the second walk is a simple array traversal. + * + * \note A single Scatter/Gather element in the block interface covers + * at most 1 machine page. In this context a sector (blkif + * nomenclature, not what I'd choose) is a 512b aligned unit + * of mapping within the machine page referenced by an S/G + * element. + */ +struct xbb_sg { +#ifdef XBB_USE_BOUNCE_BUFFERS + /** Grant reference to the peer's page for this segment. */ + grant_ref_t gnt_ref; +#else + /** + * Array of grant handles (one per page) used to map this request. + */ + grant_handle_t gnt_handle; +#endif + + /** The number of 512b data chunks mapped in this S/G element. */ + int16_t nsect; + + /** + * The index (0 based) of the first 512b data chunk mapped + * in this S/G element. + */ + uint8_t first_sect; + + /** + * The index (0 based) of the last 512b data chunk mapped + * in this S/G element. + */ + uint8_t last_sect; +}; + struct xbb_xen_reqlist { /** * Back reference to the parent block back instance for this @@ -250,27 +294,14 @@ * list structure and used to map the remote domain's pages for * this I/O, into our domain's address space. */ - uint8_t *kva; + vm_offset_t kva; /** - * Base, psuedo-physical address, corresponding to the start - * of this request's kva region. + * Array of memoized scatter gather data computed during the + * conversion of blkif ring requests to internal xbb_xen_req + * structures. */ - uint64_t gnt_base; - - -#ifdef XBB_USE_BOUNCE_BUFFERS - /** - * Pre-allocated domain local memory used to proxy remote - * domain memory during I/O operations. - */ - uint8_t *bounce; -#endif - - /** - * Array of grant handles (one per page) used to map this request. - */ - grant_handle_t *gnt_handles; + struct xbb_sg *xbb_sgs; /** * Device statistics request ordering type (ordered or simple). @@ -449,39 +480,6 @@ } xbb_type; /** - * \brief Structure used to memoize information about a per-request - * scatter-gather list. - * - * The chief benefit of using this data structure is it avoids having - * to reparse the possibly discontiguous S/G list in the original - * request. Due to the way that the mapping of the memory backing an - * I/O transaction is handled by Xen, a second pass is unavoidable. - * At least this way the second walk is a simple array traversal. - * - * \note A single Scatter/Gather element in the block interface covers - * at most 1 machine page. In this context a sector (blkif - * nomenclature, not what I'd choose) is a 512b aligned unit - * of mapping within the machine page referenced by an S/G - * element. - */ -struct xbb_sg { - /** The number of 512b data chunks mapped in this S/G element. */ - int16_t nsect; - - /** - * The index (0 based) of the first 512b data chunk mapped - * in this S/G element. - */ - uint8_t first_sect; - - /** - * The index (0 based) of the last 512b data chunk mapped - * in this S/G element. - */ - uint8_t last_sect; -}; - -/** * Character device backend specific configuration data. */ struct xbb_dev_data { @@ -509,30 +507,6 @@ * so we only need one of these. */ struct iovec xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; -#ifdef XBB_USE_BOUNCE_BUFFERS - - /** - * \brief Array of io vectors used to handle bouncing of file reads. - * - * Vnode operations are free to modify uio data during their - * exectuion. In the case of a read with bounce buffering active, - * we need some of the data from the original uio in order to - * bounce-out the read data. This array serves as the temporary - * storage for this saved data. - */ - struct iovec saved_xiovecs[XBB_MAX_SEGMENTS_PER_REQLIST]; - - /** - * \brief Array of memoized bounce buffer kva offsets used - * in the file based backend. - * - * Due to the way that the mapping of the memory backing an - * I/O transaction is handled by Xen, a second pass through - * the request sg elements is unavoidable. We memoize the computed - * bounce address here to reduce the cost of the second walk. - */ - void *xiovecs_vaddr[XBB_MAX_SEGMENTS_PER_REQLIST]; -#endif /* XBB_USE_BOUNCE_BUFFERS */ }; /** @@ -544,6 +518,18 @@ }; /** + * Maximally sized array of data needed for grant table operations. + */ +union xbb_grant_vector { +#ifdef XBB_USE_BOUNCE_BUFFERS + struct gnttab_copy copy[XBB_MAX_SEGMENTS_PER_REQLIST]; +#else + struct gnttab_map_grant_ref map[XBB_MAX_SEGMENTS_PER_REQLIST]; + struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST]; +#endif +}; + +/** * Function signature of backend specific I/O handlers. */ typedef int (*xbb_dispatch_t)(struct xbb_softc *xbb, @@ -561,6 +547,12 @@ struct taskqueue *io_taskqueue; /** + * Single "connect to frontend" task enqueued + * on io_taskqueue. + */ + struct task connect_task; + + /** * Single "run the request queue" task enqueued * on io_taskqueue. */ @@ -594,25 +586,47 @@ struct xbb_xen_reqlist *request_lists; /** - * Global pool of kva used for mapping remote domain ring - * and I/O transaction data. + * Global pool of kva used for mapping the communication ring. + * This pool is also used for mapping I/O request buffers when + * we use hypervisor mapping operations (rather than bounce + * buffering) to access front end data. */ vm_offset_t kva; - /** Psuedo-physical address corresponding to kva. */ - uint64_t gnt_base_addr; + /** + * The base address (guest machine frame address in HVM mode or + * virtual address in PV mode) corresponding to xbb->kva used + * to refer to this area in HyperVisor grant operations. + */ + uint64_t kva_gnt_base; /** The size of the global kva pool. */ int kva_size; + /** + * The size of the kva pool dedicated to mapping data for requests. + * If bounce buffering is enabled, this pool is separate from + * xbb->kva and malloc backed. Otherwise, it points into the + * xbb->kva pool. + */ + vm_offset_t reqlist_kva; + /** The size of the KVA area used for request lists. */ int reqlist_kva_size; /** The number of pages of KVA used for request lists */ int reqlist_kva_pages; +#if defined(XBB_USE_BOUNCE_BUFFERS) && defined(XENHVM) + /** + * An array mapping the virtual addresses of each page in + * reqlist_kva to its corresponding grant page index. + */ + xen_pfn_t *reqlist_gmfns; +#endif + /** Bitmap of free KVA pages */ - bitstr_t *kva_free; + bitstr_t *reqlist_kva_free; /** * \brief Cached value of the front-end's domain id. @@ -682,7 +696,7 @@ blkif_back_rings_t rings; /** IRQ mapping for the communication ring event channel. */ - int irq; + xen_intr_handle_t xen_intr_handle; /** * \brief Backend access mode flags (e.g. write, or read-only). @@ -734,20 +748,23 @@ uint64_t media_num_sectors; /** - * \brief Array of memoized scatter gather data computed during the - * conversion of blkif ring requests to internal xbb_xen_req - * structures. - * - * Ring processing is serialized so we only need one of these. + * Temporary grant table operation vector used in xbb_dispatch_io + * path. When XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping + * this on the stack could cause a stack overflow, so we allocate + * a single instance of the vector in our softc. Access is serialized + * by xbb_dispatch_io only being called via our single threaded task + * queue. */ - struct xbb_sg xbb_sgs[XBB_MAX_SEGMENTS_PER_REQLIST]; + union xbb_grant_vector xbb_dispatch_grant_vector; /** - * Temporary grant table map used in xbb_dispatch_io(). When - * XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the - * stack could cause a stack overflow. + * Temporary grant table operation vector used in xbb_unmap_reqlist. + * When XBB_MAX_SEGMENTS_PER_REQLIST gets large, keeping this on the + * stack could cause a stack overflow, so we allocate a single + * instance of the vector in our softc and serialize access via the + * softc lock. */ - struct gnttab_map_grant_ref maps[XBB_MAX_SEGMENTS_PER_REQLIST]; + union xbb_grant_vector xbb_completion_grant_vector; /** Mutex protecting per-instance data. */ struct mtx lock; @@ -794,6 +811,12 @@ /** Number of requests we have completed*/ uint64_t reqs_completed; + /** Number of requests we queued but not pushed*/ + uint64_t reqs_queued_for_completion; + + /** Number of requests we completed with an error status*/ + uint64_t reqs_completed_with_error; + /** How many forced dispatches (i.e. without coalescing) have happend */ uint64_t forced_dispatch; @@ -888,52 +911,42 @@ static inline uint8_t * xbb_reqlist_vaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { - return (reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9)); + return ((uint8_t *)reqlist->kva + (PAGE_SIZE * pagenr) + (sector << 9)); } -#ifdef XBB_USE_BOUNCE_BUFFERS /** - * Given a page index and 512b sector offset within that page, - * calculate an offset into a request's local bounce memory region. + * Given a page index, calculate the local pseudo-physical machine + * page/frame number used to perform a request. + * + * \param reqlist The request list structure whose pseudo-physical region + * will be accessed. + * \param pagenr The page index used to compute the pseudo-physical offset. * - * \param reqlist The request structure whose bounce region will be accessed. - * \param pagenr The page index used to compute the bounce offset. - * \param sector The 512b sector index used to compute the page relative - * bounce offset. + * \return The computed global pseudo-phsyical maching frame number. * - * \return The computed global bounce buffer address. + * Depending on configuration, this will either be a page from a local bounce + * buffer or from the memory region used to map in pages from the front-end + * domain. */ -static inline uint8_t * -xbb_reqlist_bounce_addr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) +static inline uint64_t +xbb_get_gntmfn(struct xbb_xen_reqlist *reqlist, int pagenr) { - return (reqlist->bounce + (PAGE_SIZE * pagenr) + (sector << 9)); -} -#endif +#ifdef XENHVM + struct xbb_softc *xbb; + vm_offset_t kva_page_idx; + + xbb = reqlist->xbb; + kva_page_idx = ((reqlist->kva - xbb->reqlist_kva) >> PAGE_SHIFT) + + pagenr; -/** - * Given a page number and 512b sector offset within that page, - * calculate an offset into the request's memory region that the - * underlying backend device/file should use for I/O. - * - * \param reqlist The request structure whose I/O region will be accessed. - * \param pagenr The page index used to compute the I/O offset. - * \param sector The 512b sector index used to compute the page relative - * I/O offset. - * - * \return The computed global I/O address. - * - * Depending on configuration, this will either be a local bounce buffer - * or a pointer to the memory mapped in from the front-end domain for - * this request. - */ -static inline uint8_t * -xbb_reqlist_ioaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) -{ #ifdef XBB_USE_BOUNCE_BUFFERS - return (xbb_reqlist_bounce_addr(reqlist, pagenr, sector)); -#else - return (xbb_reqlist_vaddr(reqlist, pagenr, sector)); + return (xbb->reqlist_gmfns[kva_page_idx]); +#else /* !XBB_USE_BOUNCE_BUFFERS */ + return ((xbb->kva_gnt_base >> PAGE_SHIFT) + kva_page_idx); #endif +#else /* !XENHVM */ + return (reqlist->kva >> PAGE_SHIFT) + pagenr); +#endif /* !XENHVM */ } /** @@ -956,13 +969,9 @@ static inline uintptr_t xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector) { - struct xbb_softc *xbb; - xbb = reqlist->xbb; - - return ((uintptr_t)(xbb->gnt_base_addr + - (uintptr_t)(reqlist->kva - xbb->kva) + - (PAGE_SIZE * pagenr) + (sector << 9))); + return ((uintptr_t)(xbb_get_gntmfn(reqlist, pagenr) << PAGE_SHIFT) + + (sector << 9)); } /** @@ -980,25 +989,25 @@ * put multiple addresses and lengths in one bio/bio chain and won't need * to map everything into one virtual segment. */ -static uint8_t * +static vm_offset_t xbb_get_kva(struct xbb_softc *xbb, int nr_pages) { - intptr_t first_clear; - intptr_t num_clear; - uint8_t *free_kva; - int i; + vm_offset_t free_kva; + int first_clear; + int num_clear; + int i; KASSERT(nr_pages != 0, ("xbb_get_kva of zero length")); first_clear = 0; - free_kva = NULL; + free_kva = 0; mtx_lock(&xbb->lock); /* * Look for the first available page. If there are none, we're done. */ - bit_ffc(xbb->kva_free, xbb->reqlist_kva_pages, &first_clear); + bit_ffc(xbb->reqlist_kva_free, xbb->reqlist_kva_pages, &first_clear); if (first_clear == -1) goto bailout; @@ -1014,7 +1023,7 @@ * (since it pointed to a region with an insufficient number * of clear pages). */ - if (bit_test(xbb->kva_free, i)) { + if (bit_test(xbb->reqlist_kva_free, i)) { num_clear = 0; first_clear = -1; continue; @@ -1029,26 +1038,26 @@ */ if (++num_clear == nr_pages) { - bit_nset(xbb->kva_free, first_clear, + bit_nset(xbb->reqlist_kva_free, first_clear, first_clear + nr_pages - 1); - free_kva = xbb->kva + - (uint8_t *)(first_clear * PAGE_SIZE); + free_kva = xbb->reqlist_kva + (first_clear * PAGE_SIZE); - KASSERT(free_kva >= (uint8_t *)xbb->kva && - free_kva + (nr_pages * PAGE_SIZE) <= - (uint8_t *)xbb->ring_config.va, - ("Free KVA %p len %d out of range, " - "kva = %#jx, ring VA = %#jx\n", free_kva, - nr_pages * PAGE_SIZE, (uintmax_t)xbb->kva, - (uintmax_t)xbb->ring_config.va)); + KASSERT(free_kva >= xbb->reqlist_kva + && (free_kva + (nr_pages * PAGE_SIZE) + <= xbb->reqlist_kva + xbb->reqlist_kva_size), + ("Free KVA %#jx len %d out of range, " + "kva = %#jx, size = %#x\n", + (uintmax_t)free_kva, nr_pages * PAGE_SIZE, + (uintmax_t)xbb->reqlist_kva, + xbb->reqlist_kva_size)); break; } } bailout: - if (free_kva == NULL) { + if (free_kva == 0) { xbb->flags |= XBBF_RESOURCE_SHORTAGE; xbb->kva_shortages++; } @@ -1066,17 +1075,18 @@ * \param nr_pages Number of pages in the KVA region. */ static void -xbb_free_kva(struct xbb_softc *xbb, uint8_t *kva_ptr, int nr_pages) +xbb_free_kva(struct xbb_softc *xbb, vm_offset_t kva, int nr_pages) { - intptr_t start_page; + vm_offset_t start_page; mtx_assert(&xbb->lock, MA_OWNED); - start_page = (intptr_t)(kva_ptr - xbb->kva) >> PAGE_SHIFT; - bit_nclear(xbb->kva_free, start_page, start_page + nr_pages - 1); + start_page = (kva - xbb->reqlist_kva) >> PAGE_SHIFT; + bit_nclear(xbb->reqlist_kva_free, start_page, start_page + nr_pages - 1); } +#ifndef XBB_USE_BOUNCE_BUFFERS /** * Unmap the front-end pages associated with this I/O request. * @@ -1085,30 +1095,204 @@ static void xbb_unmap_reqlist(struct xbb_xen_reqlist *reqlist) { - struct gnttab_unmap_grant_ref unmap[XBB_MAX_SEGMENTS_PER_REQLIST]; - u_int i; - u_int invcount; - int error; + struct gnttab_unmap_grant_ref *first_unmap; + struct gnttab_unmap_grant_ref *unmap; + struct xbb_sg *xbb_sg; + u_int seg_idx; + int error; + + mtx_assert(&reqlist->xbb->lock, MA_OWNED); - invcount = 0; - for (i = 0; i < reqlist->nr_segments; i++) { + xbb_sg = reqlist->xbb_sgs; + first_unmap = unmap = reqlist->xbb->xbb_completion_grant_vector.unmap; + for (seg_idx = 0; seg_idx < reqlist->nr_segments; seg_idx++, xbb_sg++) { - if (reqlist->gnt_handles[i] == GRANT_REF_INVALID) + if (xbb_sg->gnt_handle == GRANT_REF_INVALID) continue; - unmap[invcount].host_addr = xbb_get_gntaddr(reqlist, i, 0); - unmap[invcount].dev_bus_addr = 0; - unmap[invcount].handle = reqlist->gnt_handles[i]; - reqlist->gnt_handles[i] = GRANT_REF_INVALID; - invcount++; + unmap->host_addr = xbb_get_gntaddr(reqlist, seg_idx, 0); + unmap->dev_bus_addr = 0; + unmap->handle = xbb_sg->gnt_handle; + xbb_sg->gnt_handle = GRANT_REF_INVALID; + unmap++; } + DPRINTF("unmapped %ld segments\n", unmap - first_unmap); error = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, - unmap, invcount); + first_unmap, + unmap - first_unmap); KASSERT(error == 0, ("Grant table operation failed")); } +#endif /* !XBB_USE_BOUNCE_BUFFERS */ + +/** + * Record/initialize any per-segment grant data necessary to process + * an I/O request. + * + * \param xbb Per-instance xbb configuration structure. + * \param reqlist The request structure for the I/O being processed. + * \param xbb_sg Memoized per-segment data which will be accessible + * throughout the lifetime of the I/O. + * \param sg Request-ring per-segment data which is only accessible + * during the "pre phase" of I/O execution. + * \param seg_idx The reqlist relative index of this segment. + */ +static void +xbb_grant_init_pre_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist, + struct xbb_sg *xbb_sg, struct blkif_request_segment *sg, + u_int seg_idx) +{ +#ifdef XBB_USE_BOUNCE_BUFFERS + struct gnttab_copy *copy; + + xbb_sg->gnt_ref = sg->gref; + if (reqlist->operation == BLKIF_OP_READ) + return; + + copy = &xbb->xbb_dispatch_grant_vector.copy[seg_idx]; + copy->source.domid = xbb->otherend_id; + copy->source.offset = xbb_sg->first_sect << 9; + copy->source.u.ref = xbb_sg->gnt_ref; + copy->dest.domid = DOMID_SELF; + copy->dest.offset = xbb_sg->first_sect << 9; + copy->dest.u.gmfn = xbb_get_gntmfn(reqlist, seg_idx); + copy->len = xbb_sg->nsect << 9; + copy->flags = GNTCOPY_source_gref; +#else + struct gnttab_map_grant_ref *map; + + map = &xbb->xbb_dispatch_grant_vector.map[seg_idx]; + map->host_addr = xbb_get_gntaddr(reqlist, seg_idx, /*sector*/0); + KASSERT(map->host_addr + PAGE_SIZE <= xbb->ring_config.gnt_addr, + ("Host address %#jx len %d overlaps ring address %#jx\n", + (uintmax_t)map->host_addr, PAGE_SIZE, + (uintmax_t)xbb->ring_config.gnt_addr)); + + map->flags = GNTMAP_host_map; + map->ref = sg->gref; + map->dom = xbb->otherend_id; + if (reqlist->operation != BLKIF_OP_READ) { + /* + * The guest's data is not modified on writes. + * We only need read-only access to its pages. + */ + map->flags |= GNTMAP_readonly; + } + DPRINTF("map[%d:%p] op %d, addr %lx, flags 0x%x, ref %x, dom %d\n", + seg_idx, map, reqlist->operation, map->host_addr, map->flags, + map->ref, map->dom); +#endif +} /** + * Execute any pre-I/O grant table actions required to execute an I/O request. + * + * \param xbb Per-instance xbb configuration structure. + * \param reqlist The request structure for the I/O being processed. + */ +static int +xbb_grant_execute_pre_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) +{ +#ifdef XBB_USE_BOUNCE_BUFFERS + int error; + + if (reqlist->operation == BLKIF_OP_READ) + return (0); + + error = HYPERVISOR_grant_table_op(GNTTABOP_copy, + xbb->xbb_dispatch_grant_vector.copy, + reqlist->nr_segments); + if (error != 0) + printf("Copy-in returned %d\n", -error); + return (-error); +#else + struct gnttab_map_grant_ref *map; + struct xbb_sg *xbb_sg; + int error; + u_int seg_idx; + + error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, + xbb->xbb_dispatch_grant_vector.map, + reqlist->nr_segments); + if (error != 0) { + error = -error; + panic("Grant table operation failed (%d)", error); + } + + DPRINTF("mapped %d segments\n", reqlist->nr_segments); + reqlist->flags |= XBB_REQLIST_MAPPED; + + xbb_sg = reqlist->xbb_sgs; + for (seg_idx = 0, map = xbb->xbb_dispatch_grant_vector.map; + seg_idx < reqlist->nr_segments; seg_idx++, map++, xbb_sg++){ + + if (unlikely(map->status != 0)) { + DPRINTF("invalid buffer -- could not map it (%d)\n", + map->status); + DPRINTF("Mapping(%d:%p): Host Addr 0x%lx, flags " + "0x%x ref 0x%x, dom %d\n", seg_idx, map, + map->host_addr, map->flags, map->ref, + map->dom); + reqlist->status = BLKIF_RSP_ERROR; + error = EFAULT; + } else + xbb_sg->gnt_handle = map->handle; + } + + return (error); +#endif +} + +/** + * Execute any post-I/O grant table actions required to execute an I/O request. + * + * \param xbb Per-instance xbb configuration structure. + * \param reqlist The request structure for the I/O being processed. + */ +static void +xbb_grant_execute_post_io(struct xbb_softc *xbb, + struct xbb_xen_reqlist *reqlist) +{ +#ifdef XBB_USE_BOUNCE_BUFFERS + struct gnttab_copy *copy; + struct xbb_sg *xbb_sg; + u_int seg_idx; + int error; + + mtx_assert(&xbb->lock, MA_OWNED); + + if (reqlist->operation != BLKIF_OP_READ) + return; + + copy = xbb->xbb_completion_grant_vector.copy; + xbb_sg = reqlist->xbb_sgs; + for (seg_idx = 0; seg_idx < reqlist->nr_segments; + seg_idx++, copy++, xbb_sg++) { + + copy->source.domid = DOMID_SELF; + copy->source.offset = xbb_sg->first_sect << 9; + copy->source.u.gmfn = xbb_get_gntmfn(reqlist, seg_idx); + copy->dest.domid = xbb->otherend_id; + copy->dest.offset = xbb_sg->first_sect << 9; + copy->dest.u.ref = xbb_sg->gnt_ref; + copy->len = xbb_sg->nsect << 9; + copy->flags = GNTCOPY_dest_gref; + } + + error = HYPERVISOR_grant_table_op(GNTTABOP_copy, + xbb->xbb_completion_grant_vector.copy, + reqlist->nr_segments); + if (error != 0) { + printf("Copy-out returned %d\n", -error); + reqlist->status = BLKIF_RSP_ERROR; + } +#else + if (reqlist->flags & XBB_REQLIST_MAPPED) + xbb_unmap_reqlist(reqlist); +#endif +} + +/** * Allocate an internal transaction tracking structure from the free pool. * * \param xbb Per-instance xbb configuration structure. @@ -1129,7 +1313,7 @@ STAILQ_REMOVE_HEAD(&xbb->reqlist_free_stailq, links); reqlist->flags = XBB_REQLIST_NONE; - reqlist->kva = NULL; + reqlist->kva = 0; reqlist->status = BLKIF_RSP_OKAY; reqlist->residual_512b_sectors = 0; reqlist->num_children = 0; @@ -1153,15 +1337,17 @@ int wakeup) { - mtx_lock(&xbb->lock); + mtx_assert(&xbb->lock, MA_OWNED); if (wakeup) { wakeup = xbb->flags & XBBF_RESOURCE_SHORTAGE; xbb->flags &= ~XBBF_RESOURCE_SHORTAGE; } - if (reqlist->kva != NULL) + if (reqlist->kva != 0) { xbb_free_kva(xbb, reqlist->kva, reqlist->nr_segments); + reqlist->kva = 0; + } xbb_release_reqs(xbb, &reqlist->contig_req_list, reqlist->num_children); @@ -1177,8 +1363,6 @@ xbb_shutdown(xbb); } - mtx_unlock(&xbb->lock); - if (wakeup != 0) taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); } @@ -1269,16 +1453,16 @@ if (nreq != NULL) xbb_release_req(xbb, nreq); - mtx_unlock(&xbb->lock); - if (nreqlist != NULL) xbb_release_reqlist(xbb, nreqlist, /*wakeup*/ 0); + mtx_unlock(&xbb->lock); + return (1); } /** - * Create and transmit a response to a blkif request. + * Create and queue a response to a blkif request. * * \param xbb Per-instance xbb configuration structure. * \param req The request structure to which to respond. @@ -1286,20 +1470,28 @@ * in sys/xen/interface/io/blkif.h. */ static void -xbb_send_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) +xbb_queue_response(struct xbb_softc *xbb, struct xbb_xen_req *req, int status) { blkif_response_t *resp; - int more_to_do; - int notify; - more_to_do = 0; + /* + * The mutex is required here, and should be held across this call + * until after the subsequent call to xbb_push_responses(). This + * is to guarantee that another context won't queue responses and + * push them while we're active. + * + * That could lead to the other end being notified of responses + * before the resources have been freed on this end. The other end + * would then be able to queue additional I/O, and we may run out + * of resources because we haven't freed them all yet. + */ + mtx_assert(&xbb->lock, MA_OWNED); /* * Place on the response ring for the relevant domain. * For now, only the spacing between entries is different * in the different ABIs, not the response entry layout. */ - mtx_lock(&xbb->lock); switch (xbb->abi) { case BLKIF_PROTOCOL_NATIVE: resp = RING_GET_RESPONSE(&xbb->rings.native, @@ -1323,31 +1515,32 @@ resp->operation = req->operation; resp->status = status; - xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages); - RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, notify); - - if (xbb->rings.common.rsp_prod_pvt == xbb->rings.common.req_cons) { - - /* - * Tail check for pending requests. Allows frontend to avoid - * notifications if requests are already in flight (lower - * overheads and promotes batching). - */ - RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, more_to_do); - } else if (RING_HAS_UNCONSUMED_REQUESTS(&xbb->rings.common)) { - - more_to_do = 1; + if (status != BLKIF_RSP_OKAY) { + DPRINTF("Completing request with status %d\n", resp->status); + xbb->reqs_completed_with_error++; } - xbb->reqs_completed++; + xbb->rings.common.rsp_prod_pvt += BLKIF_SEGS_TO_BLOCKS(req->nr_pages); - mtx_unlock(&xbb->lock); + xbb->reqs_queued_for_completion++; +} - if (more_to_do) - taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); +/** + * Send queued responses to blkif requests. + * + * \param xbb Per-instance xbb configuration structure. + * \param notify Flag that is set to 1 if the other end should be + * notified via irq, 0 if the other end should not be + * notified. + */ +static void +xbb_push_responses(struct xbb_softc *xbb, int *notify) +{ - if (notify) - notify_remote_via_irq(xbb->irq); + mtx_assert(&xbb->lock, MA_OWNED); + RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbb->rings.common, *notify); + xbb->reqs_completed += xbb->reqs_queued_for_completion; + xbb->reqs_queued_for_completion = 0; } /** @@ -1361,23 +1554,28 @@ { struct xbb_xen_req *nreq; off_t sectors_sent; + int notify; sectors_sent = 0; - if (reqlist->flags & XBB_REQLIST_MAPPED) - xbb_unmap_reqlist(reqlist); + mtx_lock(&xbb->lock); + + xbb_grant_execute_post_io(xbb, reqlist); /* - * All I/O is done, send the response. A lock should not be - * necessary here because the request list is complete, and - * therefore this is the only context accessing this request - * right now. The functions we call do their own locking if - * necessary. + * All I/O is done, send the response. A lock is not necessary + * to protect the request list, because all requests have + * completed. Therefore this is the only context accessing this + * reqlist right now. However, in order to make sure that no one + * else queues responses onto the queue or pushes them to the other + * side while we're active, we need to hold the lock across the + * calls to xbb_queue_response() and xbb_push_responses(). */ STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { off_t cur_sectors_sent; - xbb_send_response(xbb, nreq, reqlist->status); + /* Put this response on the ring, but don't push yet */ + xbb_queue_response(xbb, nreq, reqlist->status); /* We don't report bytes sent if there is an error. */ if (reqlist->status == BLKIF_RSP_OKAY) @@ -1412,6 +1610,13 @@ /*then*/&reqlist->ds_t0); xbb_release_reqlist(xbb, reqlist, /*wakeup*/ 1); + + xbb_push_responses(xbb, ¬ify); + + mtx_unlock(&xbb->lock); + + if (notify) + xen_intr_signal(xbb->xen_intr_handle); } /** @@ -1465,17 +1670,6 @@ } } -#ifdef XBB_USE_BOUNCE_BUFFERS - if (bio->bio_cmd == BIO_READ) { - vm_offset_t kva_offset; - - kva_offset = (vm_offset_t)bio->bio_data - - (vm_offset_t)reqlist->bounce; - memcpy((uint8_t *)reqlist->kva + kva_offset, - bio->bio_data, bio->bio_bcount); - } -#endif /* XBB_USE_BOUNCE_BUFFERS */ - /* * Decrement the pending count for the request list. When we're * done with the requests, send status back for all of them. @@ -1504,7 +1698,6 @@ xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist) { struct xbb_sg *xbb_sg; - struct gnttab_map_grant_ref *map; struct blkif_request_segment *sg; struct blkif_request_segment *last_block_sg; struct xbb_xen_req *nreq; @@ -1514,8 +1707,8 @@ int nr_sects; int total_sects; int operation; + int error; uint8_t bio_flags; - int error; reqlist->ds_tag_type = DEVSTAT_TAG_SIMPLE; bio_flags = 0; @@ -1527,10 +1720,10 @@ * request list. If not, tell xbb_run_queue() so it can go to * sleep until we have more KVA. */ - reqlist->kva = NULL; + reqlist->kva = 0; if (reqlist->nr_segments != 0) { reqlist->kva = xbb_get_kva(xbb, reqlist->nr_segments); - if (reqlist->kva == NULL) { + if (reqlist->kva == 0) { /* * If we're out of KVA, return ENOMEM. */ @@ -1543,7 +1736,7 @@ switch (reqlist->operation) { case BLKIF_OP_WRITE_BARRIER: - bio_flags |= BIO_ORDERED; + bio_flags |= BIO_ORDERED; reqlist->ds_tag_type = DEVSTAT_TAG_ORDERED; /* FALLTHROUGH */ case BLKIF_OP_WRITE: @@ -1579,8 +1772,9 @@ if (xbb->flush_interval != 0) { if (++(xbb->flush_count) < xbb->flush_interval) { goto send_response; - } else + } else { xbb->flush_count = 0; + } } operation = BIO_FLUSH; @@ -1596,14 +1790,13 @@ } reqlist->xbb = xbb; - xbb_sg = xbb->xbb_sgs; - map = xbb->maps; + xbb_sg = reqlist->xbb_sgs; seg_idx = 0; STAILQ_FOREACH(nreq, &reqlist->contig_req_list, links) { - blkif_request_t *ring_req; - RING_IDX req_ring_idx; - u_int req_seg_idx; + blkif_request_t *ring_req; + RING_IDX req_ring_idx; + u_int req_seg_idx; ring_req = nreq->ring_req; req_ring_idx = nreq->req_ring_idx; @@ -1615,7 +1808,6 @@ req_seg_idx = 0; sg = NULL; - /* Check that number of segments is sane. */ if (unlikely(nseg == 0) || unlikely(nseg > xbb->max_request_segments)) { DPRINTF("Bad number of segments in request (%d)\n", @@ -1639,9 +1831,8 @@ xbb_sg->first_sect = sg->first_sect; xbb_sg->last_sect = sg->last_sect; - xbb_sg->nsect = - (int8_t)(sg->last_sect - - sg->first_sect + 1); + xbb_sg->nsect = (int8_t) + (sg->last_sect - sg->first_sect + 1); if ((sg->last_sect >= (PAGE_SIZE >> 9)) || (xbb_sg->nsect <= 0)) { @@ -1650,22 +1841,9 @@ } nr_sects += xbb_sg->nsect; - map->host_addr = xbb_get_gntaddr(reqlist, - seg_idx, /*sector*/0); - KASSERT(map->host_addr + PAGE_SIZE <= - xbb->ring_config.gnt_addr, - ("Host address %#jx len %d overlaps " - "ring address %#jx\n", - (uintmax_t)map->host_addr, PAGE_SIZE, - (uintmax_t)xbb->ring_config.gnt_addr)); - - map->flags = GNTMAP_host_map; - map->ref = sg->gref; - map->dom = xbb->otherend_id; - if (operation == BIO_WRITE) - map->flags |= GNTMAP_readonly; + xbb_grant_init_pre_io(xbb, reqlist, xbb_sg, + sg, seg_idx); sg++; - map++; xbb_sg++; seg_idx++; req_seg_idx++; @@ -1724,29 +1902,10 @@ } } - error = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, - xbb->maps, reqlist->nr_segments); + error = xbb_grant_execute_pre_io(xbb, reqlist); if (error != 0) - panic("Grant table operation failed (%d)", error); - - reqlist->flags |= XBB_REQLIST_MAPPED; - - for (seg_idx = 0, map = xbb->maps; seg_idx < reqlist->nr_segments; - seg_idx++, map++){ - - if (unlikely(map->status != 0)) { - DPRINTF("invalid buffer -- could not remap " - "it (%d)\n", map->status); - DPRINTF("Mapping(%d): Host Addr 0x%lx, flags " - "0x%x ref 0x%x, dom %d\n", seg_idx, - map->host_addr, map->flags, map->ref, - map->dom); - reqlist->status = BLKIF_RSP_ERROR; - goto send_response; - } + goto send_response; - reqlist->gnt_handles[seg_idx] = map->handle; - } if (reqlist->starting_sector_number + total_sects > xbb->media_num_sectors) { @@ -1762,11 +1921,7 @@ do_dispatch: - error = xbb->dispatch_io(xbb, - reqlist, - operation, - bio_flags); - + error = xbb->dispatch_io(xbb, reqlist, operation, bio_flags); if (error != 0) { reqlist->status = BLKIF_RSP_ERROR; goto send_response; @@ -1953,7 +2108,7 @@ * we've already consumed all necessary data out * of the version of the request in the ring buffer * (for native mode). We must update the consumer - * index before issueing back-end I/O so there is + * index before issuing back-end I/O so there is * no possibility that it will complete and a * response be generated before we make room in * the queue for that response. @@ -1968,22 +2123,27 @@ cur_operation = ring_req->operation; } - /* Check for I/O to dispatch */ reqlist = STAILQ_FIRST(&xbb->reqlist_pending_stailq); if (reqlist == NULL) { + int more_to_do; + /* - * We're out of work to do, put the task queue to - * sleep. + * We're out of work to do. Notify the frontend + * of its need to signal us if it produces more + * work, while protecting against the race of new + * work appearing during the setup of this + * interrupt request. */ + RING_FINAL_CHECK_FOR_REQUESTS(&xbb->rings.common, + more_to_do); + if (more_to_do) + continue; + + /* Sleep until the next interrupt. */ break; } - /* - * Grab the first request off the queue and attempt - * to dispatch it. - */ STAILQ_REMOVE_HEAD(&xbb->reqlist_pending_stailq, links); - retval = xbb_dispatch_io(xbb, reqlist); if (retval != 0) { /* @@ -2026,14 +2186,16 @@ * \param arg Callback argument registerd during event channel * binding - the xbb_softc for this instance. */ -static void -xbb_intr(void *arg) +static int +xbb_filter(void *arg) { struct xbb_softc *xbb; - /* Defer to kernel thread. */ + /* Defer to taskqueue thread. */ xbb = (struct xbb_softc *)arg; taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); + + return (FILTER_HANDLED); } SDT_PROVIDER_DEFINE(xbb); @@ -2106,7 +2268,7 @@ return (0); } - xbb_sg = xbb->xbb_sgs; + xbb_sg = reqlist->xbb_sgs; bio = NULL; nseg = reqlist->nr_segments; @@ -2144,6 +2306,7 @@ bio = bios[nbio++] = g_new_bio(); if (unlikely(bio == NULL)) { + DPRINTF("bio alloc failed\n"); error = ENOMEM; goto fail_free_bios; } @@ -2151,8 +2314,8 @@ bio->bio_flags |= bio_flags; bio->bio_dev = dev_data->cdev; bio->bio_offset = bio_offset; - bio->bio_data = xbb_reqlist_ioaddr(reqlist, seg_idx, - xbb_sg->first_sect); + bio->bio_data = xbb_reqlist_vaddr(reqlist, seg_idx, + xbb_sg->first_sect); bio->bio_done = xbb_bio_done; bio->bio_caller1 = reqlist; bio->bio_pblkno = bio_offset >> xbb->sector_size_shift; @@ -2184,17 +2347,6 @@ for (bio_idx = 0; bio_idx < nbio; bio_idx++) { -#ifdef XBB_USE_BOUNCE_BUFFERS - vm_offset_t kva_offset; - - kva_offset = (vm_offset_t)bios[bio_idx]->bio_data - - (vm_offset_t)reqlist->bounce; - if (operation == BIO_WRITE) { - memcpy(bios[bio_idx]->bio_data, - (uint8_t *)reqlist->kva + kva_offset, - bios[bio_idx]->bio_bcount); - } -#endif if (operation == BIO_READ) { SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read, device_get_unit(xbb->dev), @@ -2246,10 +2398,6 @@ struct uio xuio; struct xbb_sg *xbb_sg; struct iovec *xiovec; -#ifdef XBB_USE_BOUNCE_BUFFERS - void **p_vaddr; - int saved_uio_iovcnt; -#endif /* XBB_USE_BOUNCE_BUFFERS */ int vfs_is_locked; int error; @@ -2295,7 +2443,7 @@ xuio.uio_segflg = UIO_SYSSPACE; xuio.uio_iov = file_data->xiovecs; xuio.uio_iovcnt = 0; - xbb_sg = xbb->xbb_sgs; + xbb_sg = reqlist->xbb_sgs; nseg = reqlist->nr_segments; for (xiovec = NULL, seg_idx = 0; seg_idx < nseg; seg_idx++, xbb_sg++) { @@ -2310,20 +2458,8 @@ if (xiovec == NULL) { xiovec = &file_data->xiovecs[xuio.uio_iovcnt]; - xiovec->iov_base = xbb_reqlist_ioaddr(reqlist, + xiovec->iov_base = xbb_reqlist_vaddr(reqlist, seg_idx, xbb_sg->first_sect); -#ifdef XBB_USE_BOUNCE_BUFFERS - /* - * Store the address of the incoming - * buffer at this particular offset - * as well, so we can do the copy - * later without having to do more - * work to recalculate this address. - */ - p_vaddr = &file_data->xiovecs_vaddr[xuio.uio_iovcnt]; - *p_vaddr = xbb_reqlist_vaddr(reqlist, seg_idx, - xbb_sg->first_sect); -#endif /* XBB_USE_BOUNCE_BUFFERS */ xiovec->iov_len = 0; xuio.uio_iovcnt++; } @@ -2343,29 +2479,6 @@ xuio.uio_td = curthread; -#ifdef XBB_USE_BOUNCE_BUFFERS - saved_uio_iovcnt = xuio.uio_iovcnt; - - if (operation == BIO_WRITE) { - /* Copy the write data to the local buffer. */ - for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, - xiovec = xuio.uio_iov; seg_idx < xuio.uio_iovcnt; - seg_idx++, xiovec++, p_vaddr++) { - - memcpy(xiovec->iov_base, *p_vaddr, xiovec->iov_len); - } - } else { - /* - * We only need to save off the iovecs in the case of a - * read, because the copy for the read happens after the - * VOP_READ(). (The uio will get modified in that call - * sequence.) - */ - memcpy(file_data->saved_xiovecs, xuio.uio_iov, - xuio.uio_iovcnt * sizeof(xuio.uio_iov[0])); - } -#endif /* XBB_USE_BOUNCE_BUFFERS */ - vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount); switch (operation) { case BIO_READ: @@ -2444,27 +2557,6 @@ } VFS_UNLOCK_GIANT(vfs_is_locked); -#ifdef XBB_USE_BOUNCE_BUFFERS - /* We only need to copy here for read operations */ - if (operation == BIO_READ) { - - for (seg_idx = 0, p_vaddr = file_data->xiovecs_vaddr, - xiovec = file_data->saved_xiovecs; - seg_idx < saved_uio_iovcnt; seg_idx++, - xiovec++, p_vaddr++) { - - /* - * Note that we have to use the copy of the - * io vector we made above. uiomove() modifies - * the uio and its referenced vector as uiomove - * performs the copy, so we can't rely on any - * state from the original uio. - */ - memcpy(*p_vaddr, xiovec->iov_base, xiovec->iov_len); - } - } -#endif /* XBB_USE_BOUNCE_BUFFERS */ - bailout_send_response: if (error != 0) @@ -2800,11 +2892,17 @@ #endif } xbb->kva = 0; - xbb->gnt_base_addr = 0; - if (xbb->kva_free != NULL) { - free(xbb->kva_free, M_XENBLOCKBACK); - xbb->kva_free = NULL; + xbb->kva_gnt_base = 0; + if (xbb->reqlist_kva_free != NULL) { + free(xbb->reqlist_kva_free, M_XENBLOCKBACK); + xbb->reqlist_kva_free = NULL; } +#ifdef XBB_USE_BOUNCE_BUFFERS + free((uint8_t *)xbb->reqlist_kva, M_XENBLOCKBACK); + free(xbb->reqlist_gmfns, M_XENBLOCKBACK); + xbb->reqlist_kva = 0; + xbb->reqlist_gmfns = NULL; +#endif /* XBB_USE_BOUNCE_BUFFERS */ } /** @@ -2825,10 +2923,7 @@ if ((xbb->flags & XBBF_RING_CONNECTED) == 0) return (0); - if (xbb->irq != 0) { - unbind_from_irqhandler(xbb->irq); - xbb->irq = 0; - } + xen_intr_unbind(&xbb->xen_intr_handle); mtx_unlock(&xbb->lock); taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); @@ -2870,15 +2965,9 @@ /* There is one request list for ever allocated request. */ for (i = 0, reqlist = xbb->request_lists; i < xbb->max_requests; i++, reqlist++){ -#ifdef XBB_USE_BOUNCE_BUFFERS - if (reqlist->bounce != NULL) { - free(reqlist->bounce, M_XENBLOCKBACK); - reqlist->bounce = NULL; - } -#endif - if (reqlist->gnt_handles != NULL) { - free(reqlist->gnt_handles, M_XENBLOCKBACK); - reqlist->gnt_handles = NULL; + if (reqlist->xbb_sgs != NULL) { + free(reqlist->xbb_sgs, M_XENBLOCKBACK); + reqlist->xbb_sgs = NULL; } } free(xbb->request_lists, M_XENBLOCKBACK); @@ -2914,7 +3003,7 @@ xbb->ring_config.va = xbb->kva + (xbb->kva_size - (xbb->ring_config.ring_pages * PAGE_SIZE)); - xbb->ring_config.gnt_addr = xbb->gnt_base_addr + xbb->ring_config.gnt_addr = xbb->kva_gnt_base + (xbb->kva_size - (xbb->ring_config.ring_pages * PAGE_SIZE)); @@ -2980,15 +3069,15 @@ xbb->flags |= XBBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id, - xbb->ring_config.evtchn, - device_get_nameunit(xbb->dev), - xbb_intr, /*arg*/xbb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xbb->irq); + error = xen_intr_bind_remote_port(xbb->dev, + xbb->otherend_id, + xbb->ring_config.evtchn, + xbb_filter, + /*ithread_handler*/NULL, + /*arg*/xbb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xbb->xen_intr_handle); if (error) { - (void)xbb_disconnect(xbb); xenbus_dev_fatal(xbb->dev, error, "binding event channel"); return (error); } @@ -3000,7 +3089,7 @@ /* Needed to make bit_alloc() macro work */ #define calloc(count, size) malloc((count)*(size), M_XENBLOCKBACK, \ - M_NOWAIT|M_ZERO); + M_WAITOK|M_ZERO); /** * Size KVA and pseudo-physical address allocations based on negotiated @@ -3015,29 +3104,47 @@ static int xbb_alloc_communication_mem(struct xbb_softc *xbb) { + + xbb->kva_size = xbb->ring_config.ring_pages * PAGE_SIZE; + xbb->reqlist_kva_pages = xbb->max_requests * xbb->max_request_segments; xbb->reqlist_kva_size = xbb->reqlist_kva_pages * PAGE_SIZE; - xbb->kva_size = xbb->reqlist_kva_size + - (xbb->ring_config.ring_pages * PAGE_SIZE); + xbb->reqlist_kva_free = bit_alloc(xbb->reqlist_kva_pages); + if (xbb->reqlist_kva_free == NULL) + return (ENOMEM); + +#ifdef XBB_USE_BOUNCE_BUFFERS + /* Requests are copied into a separate malloc backed pool. */ + xbb->reqlist_kva = (vm_offset_t)malloc(xbb->reqlist_kva_size, + M_XENBLOCKBACK, M_WAITOK); + xbb->reqlist_gmfns = malloc(xbb->reqlist_kva_pages * sizeof(xen_pfn_t), + M_XENBLOCKBACK, M_WAITOK); +#ifdef XENHVM + { + vm_offset_t kva; + xen_pfn_t *pgmfn; - xbb->kva_free = bit_alloc(xbb->reqlist_kva_pages); - if (xbb->kva_free == NULL) - return (ENOMEM); + for (kva = xbb->reqlist_kva, pgmfn = xbb->reqlist_gmfns; + kva < xbb->reqlist_kva + xbb->reqlist_kva_size; + kva += PAGE_SIZE, pgmfn++) + *pgmfn = pmap_kextract(kva) >> PAGE_SHIFT; + } +#endif /* XENHVM */ +#else /* !XBB_USE_BOUNCE_BUFFERS */ + /* Requests are mapped into the global kva pool. */ + xbb->kva_size += xbb->reqlist_kva_size; +#endif /* !XBB_USE_BOUNCE_BUFFERS */ DPRINTF("%s: kva_size = %d, reqlist_kva_size = %d\n", device_get_nameunit(xbb->dev), xbb->kva_size, xbb->reqlist_kva_size); -#ifndef XENHVM - xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size); - if (xbb->kva == 0) - return (ENOMEM); - xbb->gnt_base_addr = xbb->kva; -#else /* XENHVM */ + +#ifdef XENHVM /* * Reserve a range of pseudo physical memory that we can map * into kva. These pages will only be backed by machine - * pages ("real memory") during the lifetime of front-end requests - * via grant table operations. + * pages ("real memory") during the lifetime of grant table + * operations. */ xbb->pseudo_phys_res_id = 0; xbb->pseudo_phys_res = bus_alloc_resource(xbb->dev, SYS_RES_MEMORY, @@ -3049,12 +3156,21 @@ return (ENOMEM); } xbb->kva = (vm_offset_t)rman_get_virtual(xbb->pseudo_phys_res); - xbb->gnt_base_addr = rman_get_start(xbb->pseudo_phys_res); -#endif /* XENHVM */ + xbb->kva_gnt_base = rman_get_start(xbb->pseudo_phys_res); +#else /* !XENHVM */ + xbb->kva = kmem_alloc_nofault(kernel_map, xbb->kva_size); + if (xbb->kva == 0) + return (ENOMEM); + xbb->kva_gnt_base = xbb->kva; +#endif /* !XENHVM */ + +#ifndef XBB_USE_BOUNCE_BUFFERS + xbb->reqlist_kva = xbb->kva; +#endif - DPRINTF("%s: kva: %#jx, gnt_base_addr: %#jx\n", + DPRINTF("%s: kva: %#jx, kva_gnt_base: %#jx\n", device_get_nameunit(xbb->dev), (uintmax_t)xbb->kva, - (uintmax_t)xbb->gnt_base_addr); + (uintmax_t)xbb->kva_gnt_base); return (0); } @@ -3244,7 +3360,7 @@ * Allocate request book keeping datastructures. */ xbb->requests = malloc(xbb->max_requests * sizeof(*xbb->requests), - M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + M_XENBLOCKBACK, M_WAITOK|M_ZERO); if (xbb->requests == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request structures"); @@ -3272,7 +3388,7 @@ * in flight request. */ xbb->request_lists = malloc(xbb->max_requests * - sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_NOWAIT|M_ZERO); + sizeof(*xbb->request_lists), M_XENBLOCKBACK, M_WAITOK|M_ZERO); if (xbb->request_lists == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request list structures"); @@ -3282,35 +3398,26 @@ STAILQ_INIT(&xbb->reqlist_free_stailq); STAILQ_INIT(&xbb->reqlist_pending_stailq); for (i = 0; i < xbb->max_requests; i++) { - int seg; +#ifndef XBB_USE_BOUNCE_BUFFERS + u_int seg; +#endif reqlist = &xbb->request_lists[i]; - reqlist->xbb = xbb; - -#ifdef XBB_USE_BOUNCE_BUFFERS - reqlist->bounce = malloc(xbb->max_reqlist_size, - M_XENBLOCKBACK, M_NOWAIT); - if (reqlist->bounce == NULL) { - xenbus_dev_fatal(xbb->dev, ENOMEM, - "Unable to allocate request " - "bounce buffers"); - return (ENOMEM); - } -#endif /* XBB_USE_BOUNCE_BUFFERS */ - - reqlist->gnt_handles = malloc(xbb->max_reqlist_segments * - sizeof(*reqlist->gnt_handles), - M_XENBLOCKBACK, M_NOWAIT|M_ZERO); - if (reqlist->gnt_handles == NULL) { + reqlist->xbb_sgs = malloc(xbb->max_reqlist_segments * + sizeof(*reqlist->xbb_sgs), + M_XENBLOCKBACK, M_WAITOK|M_ZERO); + if (reqlist->xbb_sgs == NULL) { xenbus_dev_fatal(xbb->dev, ENOMEM, "Unable to allocate request " "grant references"); return (ENOMEM); } +#ifndef XBB_USE_BOUNCE_BUFFERS for (seg = 0; seg < xbb->max_reqlist_segments; seg++) - reqlist->gnt_handles[seg] = GRANT_REF_INVALID; + reqlist->xbb_sgs[seg].gnt_handle = GRANT_REF_INVALID; +#endif /* !XBB_USE_BOUNCE_BUFFERS */ STAILQ_INSERT_TAIL(&xbb->reqlist_free_stailq, reqlist, links); } @@ -3383,10 +3490,12 @@ * \param xbb Per-instance xbb configuration structure. */ static void -xbb_connect(struct xbb_softc *xbb) +xbb_connect(void *context, int pending) { - int error; + struct xbb_softc *xbb; + int error; + xbb = (struct xbb_softc *)context; if (xenbus_get_state(xbb->dev) == XenbusStateConnected) return; @@ -3441,10 +3550,8 @@ if (xbb_publish_backend_info(xbb) != 0) { /* * If we can't publish our data, we cannot participate - * in this connection, and waiting for a front-end state - * change will not help the situation. + * in this connection. */ - (void)xbb_disconnect(xbb); return; } @@ -3614,6 +3721,16 @@ "how many I/O requests have been completed"); SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "reqs_queued_for_completion", CTLFLAG_RW, + &xbb->reqs_queued_for_completion, + "how many I/O requests queued but not yet pushed"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + "reqs_completed_with_error", CTLFLAG_RW, + &xbb->reqs_completed_with_error, + "how many I/O requests completed with error status"); + + SYSCTL_ADD_UQUAD(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, "forced_dispatch", CTLFLAG_RW, &xbb->forced_dispatch, "how many I/O dispatches were forced"); @@ -3678,7 +3795,8 @@ xbb = device_get_softc(dev); xbb->dev = dev; xbb->otherend_id = xenbus_get_otherend_id(dev); - TASK_INIT(&xbb->io_task, /*priority*/0, xbb_run_queue, xbb); + TASK_INIT(&xbb->connect_task, /*priority*/0, xbb_connect, xbb); + TASK_INIT(&xbb->io_task, /*priority*/1, xbb_run_queue, xbb); mtx_init(&xbb->lock, device_get_nameunit(dev), NULL, MTX_DEF); /* @@ -3805,9 +3923,10 @@ * Create a taskqueue for doing work that must occur from a * thread context. */ - xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, - taskqueue_thread_enqueue, - /*context*/&xbb->io_taskqueue); + xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev), + M_NOWAIT, + taskqueue_thread_enqueue, + /*contxt*/&xbb->io_taskqueue); if (xbb->io_taskqueue == NULL) { xbb_attach_failed(xbb, error, "Unable to create taskqueue"); return (ENOMEM); @@ -3952,7 +4071,7 @@ break; case XenbusStateInitialised: case XenbusStateConnected: - xbb_connect(xbb); + taskqueue_enqueue(xbb->io_taskqueue, &xbb->connect_task); break; case XenbusStateClosing: case XenbusStateClosed: ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/blkfront/blkfront.c#7 (text) - //SpectraBSD/stable/sys/dev/xen/blkfront/blkfront.c#2 (text) ==== content @@ -28,7 +28,7 @@ */ #include -__FBSDID("$FreeBSD: stable/9/sys/dev/xen/blkfront/blkfront.c 249132 2013-04-05 08:22:11Z mav $"); +__FBSDID("$FreeBSD: stable/9/sys/dev/xen/blkfront/blkfront.c 237873 2012-07-01 05:13:50Z ken $"); #include #include @@ -57,7 +57,6 @@ #include #include -#include #include #include #include @@ -747,7 +746,7 @@ } error = xs_printf(XST_NIL, node_path, "event-channel", - "%u", irq_to_evtchn_port(sc->irq)); + "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xb_dev, error, "writing %s/event-channel", @@ -823,13 +822,13 @@ } } - error = bind_listening_port_to_irqhandler( - xenbus_get_otherend_id(sc->xb_dev), - "xbd", (driver_intr_t *)blkif_int, sc, - INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); + error = xen_intr_alloc_and_bind_local_port( + sc->xb_dev, xenbus_get_otherend_id(sc->xb_dev), + /*filter*/NULL, blkif_int, /*arg*/sc, + INTR_TYPE_BIO | INTR_MPSAFE | INTR_ENTROPY, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xb_dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); return (error); } @@ -975,7 +974,7 @@ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); } static void @@ -1388,10 +1387,7 @@ xb_initq_complete(sc); } - if (sc->irq) { - unbind_from_irqhandler(sc->irq); - sc->irq = 0; - } + xen_intr_unbind(sc->xen_intr_handle); } static int ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/blkfront/block.h#3 (text) - //SpectraBSD/stable/sys/dev/xen/blkfront/block.h#1 (text) ==== content @@ -172,7 +172,7 @@ uint32_t max_request_size; grant_ref_t ring_ref[XBF_MAX_RING_PAGES]; blkif_front_ring_t ring; - unsigned int irq; + xen_intr_handle_t xen_intr_handle; struct gnttab_free_callback callback; TAILQ_HEAD(,xb_command) cm_free; TAILQ_HEAD(,xb_command) cm_ready; ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/control/control.c#3 (text) - //SpectraBSD/stable/sys/dev/xen/control/control.c#2 (text) ==== content @@ -128,6 +128,7 @@ #include #include +#include #include #include @@ -242,6 +243,7 @@ xencons_suspend(); gnttab_suspend(); + intr_suspend(); max_pfn = HYPERVISOR_shared_info->arch.max_pfn; @@ -282,7 +284,7 @@ HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); - irq_resume(); + intr_resume(); local_irq_enable(); xencons_resume(); @@ -352,13 +354,11 @@ * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); - irq_suspend(); + intr_suspend(); suspend_cancelled = HYPERVISOR_suspend(0); - if (suspend_cancelled) - irq_resume(); - else - xenpci_resume(); + + intr_resume(); /* * Re-enable interrupts and put the scheduler back to normal. ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/netback/netback.c#3 (text) - //SpectraBSD/stable/sys/dev/xen/netback/netback.c#1 (text) ==== content @@ -75,14 +75,12 @@ #include #include -#include -#include #include #include #include -#include +#include #include #include #include @@ -433,8 +431,8 @@ /** Xen device handle.*/ long handle; - /** IRQ mapping for the communication ring event channel. */ - int irq; + /** Handle to the communication ring event channel. */ + xen_intr_handle_t xen_intr_handle; /** * \brief Cached value of the front-end's domain id. @@ -647,10 +645,7 @@ int error; int i; - if (xnb->irq != 0) { - unbind_from_irqhandler(xnb->irq); - xnb->irq = 0; - } + xen_intr_unbind(xnb->xen_intr_handle); /* * We may still have another thread currently processing requests. We @@ -773,13 +768,13 @@ xnb->flags |= XNBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id, - xnb->evtchn, - device_get_nameunit(xnb->dev), - xnb_intr, /*arg*/xnb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xnb->irq); + error = xen_intr_bind_remote_port(xnb->dev, + xnb->otherend_id, + xnb->evtchn, + /*filter*/NULL, + xnb_intr, /*arg*/xnb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xnb->xen_intr_handle); if (error != 0) { (void)xnb_disconnect(xnb); xenbus_dev_fatal(xnb->dev, error, "binding event channel"); @@ -1448,7 +1443,7 @@ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); if (notify != 0) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); txb->sring->req_event = txb->req_cons + 1; xen_mb(); @@ -2361,7 +2356,7 @@ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); if ((notify != 0) || (out_of_space != 0)) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); rxb->sring->req_event = req_prod_local + 1; xen_mb(); } while (rxb->sring->req_prod != req_prod_local) ; ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/netfront/netfront.c#4 (text) - //SpectraBSD/stable/sys/dev/xen/netfront/netfront.c#2 (text) ==== content @@ -25,7 +25,7 @@ */ #include -__FBSDID("$FreeBSD: stable/9/sys/dev/xen/netfront/netfront.c 248078 2013-03-09 00:39:54Z marius $"); +__FBSDID("$FreeBSD: stable/9/sys/dev/xen/netfront/netfront.c 246007 2013-01-27 23:02:33Z marius $"); #include "opt_inet.h" @@ -81,7 +81,6 @@ #include #include #include -#include #include #include #include @@ -256,8 +255,7 @@ struct mtx rx_lock; struct mtx sc_lock; - u_int handle; - u_int irq; + xen_intr_handle_t xen_intr_handle; u_int copying_receiver; u_int carrier; u_int maxfrags; @@ -546,7 +544,8 @@ goto abort_transaction; } err = xs_printf(xst, node, - "event-channel", "%u", irq_to_evtchn_port(info->irq)); + "event-channel", "%u", + xen_intr_port(info->xen_intr_handle)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -608,7 +607,6 @@ info->rx_ring_ref = GRANT_REF_INVALID; info->rx.sring = NULL; info->tx.sring = NULL; - info->irq = 0; txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!txs) { @@ -635,12 +633,13 @@ if (error) goto fail; - error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), - "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); + error = xen_intr_alloc_and_bind_local_port(dev, + xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info, + INTR_TYPE_NET | INTR_MPSAFE | INTR_ENTROPY, &info->xen_intr_handle); if (error) { xenbus_dev_fatal(dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); goto fail; } @@ -819,13 +818,13 @@ */ batch_target = sc->rx_target - (req_prod - sc->rx.rsp_cons); for (i = mbufq_len(&sc->xn_rx_batch); i < batch_target; i++) { - MGETHDR(m_new, M_NOWAIT, MT_DATA); + MGETHDR(m_new, M_DONTWAIT, MT_DATA); if (m_new == NULL) { printf("%s: MGETHDR failed\n", __func__); goto no_mbuf; } - m_cljget(m_new, M_NOWAIT, MJUMPAGESIZE); + m_cljget(m_new, M_DONTWAIT, MJUMPAGESIZE); if ((m_new->m_flags & M_EXT) == 0) { printf("%s: m_cljget failed\n", __func__); m_freem(m_new); @@ -960,7 +959,7 @@ push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); } static void @@ -1506,7 +1505,7 @@ * the Linux network stack. */ if (nfrags > sc->maxfrags) { - m = m_defrag(m_head, M_NOWAIT); + m = m_defrag(m_head, M_DONTWAIT); if (!m) { /* * Defrag failed, so free the mbuf and @@ -1678,7 +1677,7 @@ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); if (RING_FULL(&sc->tx)) { sc->tx_full = 1; @@ -1960,7 +1959,7 @@ * packets. */ netfront_carrier_on(np); - notify_remote_via_irq(np->irq); + xen_intr_signal(np->xen_intr_handle); XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); @@ -2049,8 +2048,9 @@ return (err); } -/** Create a network device. - * @param handle device handle +/** + * Create a network device. + * @param dev Newbus device representing this virtual NIC. */ int create_netdev(device_t dev) @@ -2118,7 +2118,6 @@ ifp->if_watchdog = xn_watchdog; #endif ifp->if_init = xn_ifinit; - ifp->if_mtu = ETHERMTU; ifp->if_snd.ifq_maxlen = NET_TX_RING_SIZE - 1; ifp->if_hwassist = XN_CSUM_FEATURES; @@ -2190,10 +2189,7 @@ free_ring(&info->tx_ring_ref, &info->tx.sring); free_ring(&info->rx_ring_ref, &info->rx.sring); - if (info->irq) - unbind_from_irqhandler(info->irq); - - info->irq = 0; + xen_intr_unbind(&info->xen_intr_handle); } static void ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/xenpci/xenpci.c#2 (text) - //SpectraBSD/stable/sys/dev/xen/xenpci/xenpci.c#1 (text) ==== content @@ -32,9 +32,6 @@ #include #include #include -#include -#include -#include #include #include @@ -42,30 +39,18 @@ #include #include + #include #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include -#include +extern void xen_intr_handle_upcall(struct trapframe *trap_frame); -/* - * These variables are used by the rest of the kernel to access the - * hypervisor. - */ -char *hypercall_stubs; -shared_info_t *HYPERVISOR_shared_info; -static vm_paddr_t shared_info_pa; static device_t nexus; /* @@ -73,103 +58,28 @@ */ static devclass_t xenpci_devclass; -/* - * Return the CPUID base address for Xen functions. - */ -static uint32_t -xenpci_cpuid_base(void) +static int +xenpci_intr_filter(void *trap_frame) { - uint32_t base, regs[4]; - - for (base = 0x40000000; base < 0x40010000; base += 0x100) { - do_cpuid(base, regs); - if (!memcmp("XenVMMXenVMM", ®s[1], 12) - && (regs[0] - base) >= 2) - return (base); - } - return (0); + xen_intr_handle_upcall(trap_frame); + return (FILTER_HANDLED); } -/* - * Allocate and fill in the hypcall page. - */ static int -xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp) +xenpci_irq_init(device_t device, struct xenpci_softc *scp) { - uint32_t base, regs[4]; - int i; + int error; - base = xenpci_cpuid_base(); - if (!base) { - device_printf(dev, "Xen platform device but not Xen VMM\n"); - return (EINVAL); - } - - if (bootverbose) { - do_cpuid(base + 1, regs); - device_printf(dev, "Xen version %d.%d.\n", - regs[0] >> 16, regs[0] & 0xffff); - } - - /* - * Find the hypercall pages. - */ - do_cpuid(base + 2, regs); - - hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK); - - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } - - return (0); + error = BUS_SETUP_INTR(device_get_parent(device), device, + scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, + xenpci_intr_filter, NULL, /*trap_frame*/NULL, + &scp->intr_cookie); + if (error != 0) + xen_hvm_set_callback(device); + return (error); } /* - * After a resume, re-initialise the hypercall page. - */ -static void -xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp) -{ - uint32_t base, regs[4]; - int i; - - base = xenpci_cpuid_base(); - - do_cpuid(base + 2, regs); - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } -} - -/* - * Tell the hypervisor how to contact us for event channel callbacks. - */ -static void -xenpci_set_callback(device_t dev) -{ - int irq; - uint64_t callback; - struct xen_hvm_param xhp; - - irq = pci_get_irq(dev); - if (irq < 16) { - callback = irq; - } else { - callback = (pci_get_intpin(dev) - 1) & 3; - callback |= pci_get_slot(dev) << 11; - callback |= 1ull << 56; - } - - xhp.domid = DOMID_SELF; - xhp.index = HVM_PARAM_CALLBACK_IRQ; - xhp.value = callback; - if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)) - panic("Can't set evtchn callback"); -} - - -/* * Deallocate anything allocated by xenpci_allocate_resources. */ static int @@ -293,35 +203,6 @@ } /* - * Called very early in the resume sequence - reinitialise the various - * bits of Xen machinery including the hypercall page and the shared - * info page. - */ -void -xenpci_resume() -{ - device_t dev = devclass_get_device(xenpci_devclass, 0); - struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - - xenpci_resume_hypercall_stubs(dev, scp); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa); - - xenpci_set_callback(dev); - - gnttab_resume(); - irq_resume(); -} - -/* * Probe - just check device ID. */ static int @@ -341,11 +222,9 @@ static int xenpci_attach(device_t dev) { - int error; struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - vm_offset_t shared_va; devclass_t dc; + int error; /* * Find and record nexus0. Since we are not really on the @@ -365,33 +244,10 @@ goto errexit; } - error = xenpci_init_hypercall_stubs(dev, scp); - if (error) { - device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n", - error); - goto errexit; - } - - setup_xen_features(); - - xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - pmap_kenter(shared_va, shared_info_pa); - HYPERVISOR_shared_info = (void *) shared_va; - /* * Hook the irq up to evtchn */ xenpci_irq_init(dev, scp); - xenpci_set_callback(dev); return (bus_generic_attach(dev)); @@ -431,13 +287,42 @@ return (xenpci_deallocate_resources(dev)); } +static int +xenpci_suspend(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + if (scp->intr_cookie != NULL) { + if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq, + scp->intr_cookie) != 0) + printf("intr teardown failed.. continuing\n"); + scp->intr_cookie = NULL; + } + + return (bus_generic_suspend(dev)); +} + +static int +xenpci_resume(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + BUS_SETUP_INTR(parent, dev, scp->res_irq, + INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL, + /*trap_frame*/NULL, &scp->intr_cookie); + xen_hvm_set_callback(dev); + return (bus_generic_resume(dev)); +} + static device_method_t xenpci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xenpci_probe), DEVMETHOD(device_attach, xenpci_attach), DEVMETHOD(device_detach, xenpci_detach), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), + DEVMETHOD(device_suspend, xenpci_suspend), + DEVMETHOD(device_resume, xenpci_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), ==== //depot/vendor/FreeBSD/stable/9/sys/dev/xen/xenpci/xenpcivar.h#1 (text) - //SpectraBSD/stable/sys/dev/xen/xenpci/xenpcivar.h#1 (text) ==== content @@ -38,7 +38,4 @@ vm_paddr_t phys_next; /* next page from mem range */ }; -extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp); extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa); -extern void xenpci_resume(void); -extern void xen_suspend(void); ==== //depot/vendor/FreeBSD/stable/9/sys/i386/i386/apic_vector.s#3 (text) - //SpectraBSD/stable/sys/i386/i386/apic_vector.s#2 (text) ==== content @@ -138,6 +138,25 @@ MEXITCOUNT jmp doreti +#ifdef XENHVM +/* + * Xen event channel upcall interrupt handler. + * Only used when the hypervisor supports direct vector callbacks. + */ + .text + SUPERALIGN_TEXT +IDTVEC(xen_intr_upcall) + PUSH_FRAME + SET_KERNEL_SREGS + cld + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + call xen_intr_handle_upcall + add $4, %esp + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. ==== //depot/vendor/FreeBSD/stable/9/sys/i386/i386/machdep.c#11 (text) - //SpectraBSD/stable/sys/i386/i386/machdep.c#3 (text) ==== content @@ -1946,6 +1946,9 @@ #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif +#ifdef XENHVM + IDTVEC(xen_intr_upcall), +#endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB @@ -2934,6 +2937,10 @@ setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif +#ifdef XENHVM + setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386TGT, SEL_UPL, + GSEL(GCODE_SEL, SEL_KPL)); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; ==== //depot/vendor/FreeBSD/stable/9/sys/i386/include/apicvar.h#3 (text) - //SpectraBSD/stable/sys/i386/include/apicvar.h#2 (text) ==== content @@ -218,6 +218,7 @@ u_int apic_idt_to_irq(u_int apic_id, u_int vector); void apic_register_enumerator(struct apic_enumerator *enumerator); u_int apic_cpuid(u_int apic_id); +void evtchn_handle_upcall(struct trapframe *trap_frame); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); ==== //depot/vendor/FreeBSD/stable/9/sys/i386/include/intr_machdep.h#6 (text) - //SpectraBSD/stable/sys/i386/include/intr_machdep.h#2 (text) ==== content @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $FreeBSD: stable/9/sys/i386/include/intr_machdep.h 247877 2013-03-06 09:22:45Z avg $ + * $FreeBSD: stable/9/sys/i386/include/intr_machdep.h 235260 2012-05-11 04:10:23Z attilio $ */ #ifndef __MACHINE_INTR_MACHDEP_H__ @@ -44,12 +44,24 @@ * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 1791 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. @@ -94,7 +106,7 @@ int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); - TAILQ_ENTRY(pic) pics; + STAILQ_ENTRY(pic) pics; }; /* Flags for pic_disable_source() */ ==== //depot/vendor/FreeBSD/stable/9/sys/i386/include/pcpu.h#3 (text) - //SpectraBSD/stable/sys/i386/include/pcpu.h#2 (text) ==== content @@ -76,14 +76,7 @@ int pc_virq_to_irq[NR_VIRQS]; \ int pc_ipi_to_irq[NR_IPIS] -#elif defined(XENHVM) - -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i - -#else /* !XEN && !XENHVM */ +#else /* !XEN */ #define PCPU_XEN_FIELDS ==== //depot/vendor/FreeBSD/stable/9/sys/i386/include/segments.h#4 (text) - //SpectraBSD/stable/sys/i386/include/segments.h#2 (text) ==== content @@ -208,6 +208,7 @@ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ #define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) ==== //depot/vendor/FreeBSD/stable/9/sys/sys/kernel.h#3 (text) - //SpectraBSD/stable/sys/sys/kernel.h#1 (text) ==== content @@ -102,6 +102,11 @@ SI_SUB_VM = 0x1000000, /* virtual memory system init*/ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_KVM_RSRC = 0x1A00000, /* kvm operational limits*/ + SI_SUB_HYPERVISOR = 0x1A40000, /* + * Hypervisor detection and + * virtualization support + * setup. + */ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ ==== //depot/vendor/FreeBSD/stable/9/sys/x86/x86/local_apic.c#4 (text) - //SpectraBSD/stable/sys/x86/x86/local_apic.c#1 (text) ==== content @@ -90,6 +90,7 @@ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) +#define IRQ_EVTCHN (NUM_IO_INTS + 4) /* * Support for local APICs. Local APICs manage interrupts on each @@ -310,6 +311,9 @@ #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif +#ifdef XENHVM + lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; +#endif #ifdef SMP @@ -1139,6 +1143,10 @@ if (irq == IRQ_DTRACE_RET) continue; #endif +#ifdef XENHVM + if (irq == IRQ_EVTCHN) + continue; +#endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); ==== - //SpectraBSD/stable/sys/x86/xen/hvm.c#1 ==== ==== - //SpectraBSD/stable/sys/x86/xen/xen_intr.c#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/evtchn.h#2 (text) - //SpectraBSD/stable/sys/xen/evtchn.h#1 (text) ==== content @@ -1,94 +1,87 @@ /****************************************************************************** * evtchn.h * - * Communication via Xen event channels. - * Also definitions for the device that demuxes notifications to userspace. + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * Copyright (c) 2004, K A Fraser + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. * - * $FreeBSD: stable/9/sys/xen/evtchn.h 196322 2009-08-17 14:38:59Z jhb $ + * $FreeBSD$ */ -#ifndef __ASM_EVTCHN_H__ -#define __ASM_EVTCHN_H__ -#include -#include -#include -#include +#ifndef __XEN_EVTCHN_H__ +#define __XEN_EVTCHN_H__ /* - * LOW-LEVEL DEFINITIONS + * Bind a fresh port to VIRQ @virq. */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOWR('E', 4, struct ioctl_evtchn_bind_virq) +struct ioctl_evtchn_bind_virq { + unsigned int virq; + unsigned int port; +}; /* - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently dropped. + * Bind a fresh port to remote <@remote_domain, @remote_port>. */ -void notify_remote_via_irq(int irq); +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOWR('E', 5, struct ioctl_evtchn_bind_interdomain) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; + unsigned int port; +}; - -/* Entry point for notifications into Linux subsystems. */ -void evtchn_do_upcall(struct trapframe *frame); - -/* Entry point for notifications into the userland character device. */ -void evtchn_device_upcall(int port); +/* + * Allocate a fresh port for binding to @remote_domain. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; + unsigned int port; +}; -void mask_evtchn(int port); - -void unmask_evtchn(int port); - -#ifdef SMP -void rebind_evtchn_to_cpu(int port, unsigned int cpu); -#else -#define rebind_evtchn_to_cpu(port, cpu) ((void)0) -#endif - -static inline -int test_and_set_evtchn_mask(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - return synch_test_and_set_bit(port, s->evtchn_mask); -} - -static inline void -clear_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void -notify_remote_via_evtchn(int port) -{ - struct evtchn_send send = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); -} - /* - * Use these to access the event channel underlying the IRQ handle returned - * by bind_*_to_irqhandler(). + * Unbind previously allocated @port. */ -int irq_to_evtchn_port(int irq); - -void ipi_pcpu(unsigned int cpu, int vector); +#define IOCTL_EVTCHN_UNBIND \ + _IOW('E', 7, struct ioctl_evtchn_unbind) +struct ioctl_evtchn_unbind { + unsigned int port; +}; /* - * CHARACTER-DEVICE DEFINITIONS + * Send event to previously allocated @port. */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOW('E', 8, struct ioctl_evtchn_notify) +struct ioctl_evtchn_notify { + unsigned int port; +}; -#define PORT_NORMAL 0x0000 -#define PORT_EXCEPTION 0x8000 -#define PORTIDX_MASK 0x7fff +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IO('E', 9) -/* /dev/xen/evtchn resides at device number major=10, minor=200 */ -#define EVTCHN_MINOR 200 - -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to the specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) - -#endif /* __ASM_EVTCHN_H__ */ +#endif /* __XEN_EVTCHN_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/evtchn/evtchn_dev.c#2 (text) - //SpectraBSD/stable/sys/xen/evtchn/evtchn_dev.c#1 (text) ==== content @@ -22,28 +22,23 @@ #include #include #include +#include #include -#include #include -#include #include #include -#include + #include +#include +#include typedef struct evtchn_sotfc { struct selinfo ev_rsel; } evtchn_softc_t; - -#ifdef linuxcrap -/* NB. This must be shared amongst drivers if more things go in /dev/xen */ -static devfs_handle_t xen_dev_dir; -#endif - /* Only one process may open /dev/xen/evtchn at any time. */ static unsigned long evtchn_dev_inuse; @@ -72,12 +67,12 @@ void -evtchn_device_upcall(int port) +evtchn_device_upcall(evtchn_port_t port) { mtx_lock(&upcall_lock); - mask_evtchn(port); - clear_evtchn(port); + evtchn_mask_port(port); + evtchn_clear_port(port); if ( ring != NULL ) { if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { @@ -208,7 +203,7 @@ mtx_lock_spin(&lock); for ( i = 0; i < (count/2); i++ ) if ( test_bit(kbuf[i], &bound_ports[0]) ) - unmask_evtchn(kbuf[i]); + evtchn_unmask_port(kbuf[i]); mtx_unlock_spin(&lock); rc = count; @@ -224,6 +219,7 @@ { int rc = 0; +#ifdef NOTYET mtx_lock_spin(&lock); switch ( cmd ) @@ -249,6 +245,7 @@ } mtx_unlock_spin(&lock); +#endif return rc; } @@ -309,7 +306,7 @@ mtx_lock_spin(&lock); for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) - mask_evtchn(i); + evtchn_mask_port(i); mtx_unlock_spin(&lock); evtchn_dev_inuse = 0; @@ -318,15 +315,14 @@ } static struct cdevsw evtchn_devsw = { - d_version: D_VERSION, - d_open: evtchn_open, - d_close: evtchn_close, - d_read: evtchn_read, - d_write: evtchn_write, - d_ioctl: evtchn_ioctl, - d_poll: evtchn_poll, - d_name: "evtchn", - d_flags: 0, + .d_version = D_VERSION, + .d_open = evtchn_open, + .d_close = evtchn_close, + .d_read = evtchn_read, + .d_write = evtchn_write, + .d_ioctl = evtchn_ioctl, + .d_poll = evtchn_poll, + .d_name = "evtchn", }; @@ -353,34 +349,6 @@ evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); - /* XXX I don't think we need any of this rubbish */ -#if 0 - if ( err != 0 ) - { - printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); - return err; - } - - /* (DEVFS) create directory '/dev/xen'. */ - xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); - - /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ - pos = devfs_generate_path(evtchn_miscdev.devfs_handle, - &link_dest[3], - sizeof(link_dest) - 3); - if ( pos >= 0 ) - strncpy(&link_dest[pos], "../", 3); - /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ - (void)devfs_mk_symlink(xen_dev_dir, - "evtchn", - DEVFS_FL_DEFAULT, - &link_dest[pos], - &symlink_handle, - NULL); - - /* (DEVFS) automatically destroy the symlink with its destination. */ - devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); -#endif if (bootverbose) printf("Event-channel device installed.\n"); ==== - //SpectraBSD/stable/sys/xen/evtchn/evtchnvar.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/hvm.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/arch-arm.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/arch-arm/hvm/save.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-ia64.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-ia64.h#1 (text) ==== content @@ -49,10 +49,11 @@ #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name #define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) #define uint64_aligned_t uint64_t -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #ifndef __ASSEMBLY__ typedef unsigned long xen_pfn_t; @@ -66,7 +67,7 @@ /* Maximum number of virtual CPUs in multi-processor guests. */ /* WARNING: before changing this, check that shared_info fits on a page */ -#define MAX_VIRT_CPUS 64 +#define XEN_LEGACY_MAX_VCPUS 64 /* IO ports location for PV. */ #define IO_PORTS_PADDR 0x00000ffffc000000UL @@ -198,6 +199,15 @@ unsigned long rrs[8]; // region registers unsigned long krs[8]; // kernel registers unsigned long tmp[16]; // temp registers (e.g. for hyperprivops) + + /* itc paravirtualization + * vAR.ITC = mAR.ITC + itc_offset + * itc_last is one which was lastly passed to + * the guest OS in order to prevent it from + * going backwords. + */ + unsigned long itc_offset; + unsigned long itc_last; }; }; }; @@ -392,6 +402,7 @@ #define VGCF_EXTRA_REGS (1UL << 1) /* Set extra regs. */ #define VGCF_SET_CR_IRR (1UL << 2) /* Set cr_irr[0:3]. */ #define VGCF_online (1UL << 3) /* make this vcpu online */ +#define VGCF_SET_AR_ITC (1UL << 4) /* set pv ar.itc. itc_offset, itc_last */ unsigned long flags; /* VGCF_* flags */ struct vcpu_guest_context_regs regs; @@ -453,6 +464,11 @@ /* unexpose the foreign domain's p2m table into privileged domain */ #define IA64_DOM0VP_unexpose_foreign_p2m 13 +/* get memmap_info and memmap. It is possible to map the page directly + by foreign page mapping, but there is a race between writer. + This hypercall avoids such race. */ +#define IA64_DOM0VP_get_memmap 14 + // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 #define ASSIGN_readonly (1UL << _ASSIGN_readonly) ==== - //SpectraBSD/stable/sys/xen/interface/arch-ia64/debug_op.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/arch-ia64/hvm/memmap.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/arch-ia64/hvm/save.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/arch-ia64/sioemu.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/cpuid.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/cpuid.h#1 (text) ==== content @@ -24,7 +24,7 @@ * Copyright (c) 2007 Citrix Systems, Inc. * * Authors: - * Keir Fraser + * Keir Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/hvm/save.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/hvm/save.h#1 (text) ==== content @@ -38,7 +38,7 @@ uint32_t version; /* File format version */ uint64_t changeset; /* Version of Xen that saved this file */ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ - uint32_t pad0; + uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ }; DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); @@ -46,6 +46,8 @@ /* * Processor + * + * Compat: Pre-3.4 didn't have msr_tsc_aux */ struct hvm_hw_cpu { @@ -123,9 +125,116 @@ uint32_t tr_arbytes; uint32_t ldtr_arbytes; - uint32_t sysenter_cs; - uint32_t padding0; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + uint64_t msr_tsc_aux; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +struct hvm_hw_cpu_compat { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; @@ -139,6 +248,7 @@ uint64_t msr_cstar; uint64_t msr_syscall_mask; uint64_t msr_efer; + /*uint64_t msr_tsc_aux; COMPAT */ /* guest's idea of what rdtsc() would return */ uint64_t tsc; @@ -158,8 +268,22 @@ uint32_t error_code; }; -DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); +static inline int _hvm_hw_fix_cpu(void *h) { + struct hvm_hw_cpu *new=h; + struct hvm_hw_cpu_compat *old=h; + + /* If we copy from the end backwards, we should + * be able to do the modification in-place */ + new->error_code=old->error_code; + new->pending_event=old->pending_event; + new->tsc=old->tsc; + new->msr_tsc_aux=0; + + return 0; +} +DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ + struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); /* * PIC @@ -220,12 +344,7 @@ * IO-APIC */ -#ifdef __ia64__ -#define VIOAPIC_IS_IOSAPIC 1 -#define VIOAPIC_NUM_PINS 24 -#else #define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ -#endif struct hvm_hw_vioapic { uint64_t base_address; @@ -244,13 +363,8 @@ uint8_t trig_mode:1; uint8_t mask:1; uint8_t reserve:7; -#if !VIOAPIC_IS_IOSAPIC uint8_t reserved[4]; uint8_t dest_id; -#else - uint8_t reserved[3]; - uint16_t dest_id; -#endif } fields; } redirtbl[VIOAPIC_NUM_PINS]; }; @@ -266,6 +380,7 @@ uint64_t apic_base_msr; uint32_t disabled; /* VLAPIC_xx_DISABLED */ uint32_t timer_divisor; + uint64_t tdt_msr; }; DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); @@ -287,7 +402,7 @@ * Indexed by: device*4 + INTx#. */ union { - DECLARE_BITMAP(i, 32*4); + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ uint64_t pad[2]; }; }; @@ -300,7 +415,7 @@ * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). */ union { - DECLARE_BITMAP(i, 16); + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ uint64_t pad[1]; }; }; @@ -421,9 +536,54 @@ DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); +/* + * The save area of XSAVE/XRSTOR. + */ + +struct hvm_hw_cpu_xsave { + uint64_t xfeature_mask; + uint64_t xcr0; /* Updated by XSETBV */ + uint64_t xcr0_accum; /* Updated by XSETBV */ + struct { + struct { char x[512]; } fpu_sse; + + struct { + uint64_t xstate_bv; /* Updated by XRSTOR */ + uint64_t reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[0]; } ymm; /* YMM */ + } save_area; +}; + +#define CPU_XSAVE_CODE 16 + +/* + * Viridian hypervisor context. + */ + +struct hvm_viridian_domain_context { + uint64_t hypercall_gpa; + uint64_t guest_os_id; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); + +struct hvm_viridian_vcpu_context { + uint64_t apic_assist; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); + +struct hvm_vmce_vcpu { + uint64_t caps; +}; + +DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 14 +#define HVM_SAVE_CODE_MAX 18 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/xen-mca.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/xen-mca.h#1 (text) ==== content @@ -56,13 +56,20 @@ /* Hypercall */ #define __HYPERVISOR_mca __HYPERVISOR_arch_0 -#define XEN_MCA_INTERFACE_VERSION 0x03000001 +/* + * The xen-unstable repo has interface version 0x03000001; out interface + * is incompatible with that and any future minor revisions, so we + * choose a different version number range that is numerically less + * than that used in xen-unstable. + */ +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 -/* IN: Dom0 calls hypercall from MC event handler. */ -#define XEN_MC_CORRECTABLE 0x0 -/* IN: Dom0/DomU calls hypercall from MC trap handler. */ -#define XEN_MC_TRAP 0x1 -/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */ +/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ +#define XEN_MC_NONURGENT 0x0001 +/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ +#define XEN_MC_URGENT 0x0002 +/* IN: Dom0 acknowledges previosly-fetched telemetry */ +#define XEN_MC_ACK 0x0004 /* OUT: All is ok */ #define XEN_MC_OK 0x0 @@ -97,6 +104,7 @@ #define MC_TYPE_GLOBAL 0 #define MC_TYPE_BANK 1 #define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 struct mcinfo_common { uint16_t type; /* structure type */ @@ -106,19 +114,24 @@ #define MC_FLAG_CORRECTABLE (1 << 0) #define MC_FLAG_UNCORRECTABLE (1 << 1) - +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) +#define MC_FLAG_MCE (1 << 6) /* contains global x86 mc information */ struct mcinfo_global { struct mcinfo_common common; /* running domain at the time in error (most likely the impacted one) */ uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ uint16_t mc_core_threadid; /* core thread of physical core */ - uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint32_t mc_apicid; + uint32_t mc_flags; uint64_t mc_gstatus; /* global status */ - uint32_t mc_flags; }; /* contains bank local x86 mc information */ @@ -132,6 +145,8 @@ uint64_t mc_addr; /* bank address, only valid * if addr bit is set in mc_status */ uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; }; @@ -150,21 +165,121 @@ * multiple times. */ uint32_t mc_msrs; /* Number of msr with valid values. */ - struct mcinfo_msr mc_msr[5]; + /* + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery +{ + struct mcinfo_common common; + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; }; + #define MCINFO_HYPERCALLSIZE 1024 #define MCINFO_MAXSIZE 768 +#define MCINFO_FLAGS_UNCOMPLETE 0x1 struct mc_info { /* Number of mcinfo_* entries in mi_data */ uint32_t mi_nentries; - - uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; + uint32_t flags; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; }; typedef struct mc_info mc_info_t; +DEFINE_XEN_GUEST_HANDLE(mc_info_t); +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ +struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); + /* * OS's should use these instead of writing their own lookup function @@ -181,12 +296,12 @@ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); */ #define x86_mcinfo_first(_mi) \ - (struct mcinfo_common *)((_mi)->mi_data) + ((struct mcinfo_common *)(_mi)->mi_data) /* Prototype: * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); */ #define x86_mcinfo_next(_mic) \ - (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) /* Prototype: * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); @@ -228,14 +343,15 @@ #define XEN_MC_fetch 1 struct xen_mc_fetch { /* IN/OUT variables. */ - uint32_t flags; - -/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ -/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, + XEN_MC_ACK if ack'ing an earlier fetch */ + /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, + XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; + uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ /* OUT variables. */ - uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */ - struct mc_info mc_info; + XEN_GUEST_HANDLE(mc_info_t) data; }; typedef struct xen_mc_fetch xen_mc_fetch_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); @@ -250,7 +366,6 @@ uint16_t mc_domid; /* The unprivileged domain to notify. */ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. * Usually echo'd value from the fetch hypercall. */ - uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */ /* IN/OUT variables. */ uint32_t flags; @@ -261,14 +376,60 @@ typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t _pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; + +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define XEN_MC_inject_v2 6 +#define XEN_MC_INJECT_TYPE_MASK 0x7 +#define XEN_MC_INJECT_TYPE_MCE 0x0 +#define XEN_MC_INJECT_TYPE_CMCI 0x1 + +#define XEN_MC_INJECT_CPU_BROADCAST 0x8 + +struct xen_mc_inject_v2 { + uint32_t flags; + struct xenctl_cpumap cpumap; +}; +#endif + struct xen_mc { uint32_t cmd; uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ union { struct xen_mc_fetch mc_fetch; struct xen_mc_notifydomain mc_notifydomain; - uint8_t pad[MCINFO_HYPERCALLSIZE]; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; +#if defined(__XEN__) || defined(__XEN_TOOLS__) + struct xen_mc_inject_v2 mc_inject_v2; +#endif } u; }; typedef struct xen_mc xen_mc_t; ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_32.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/xen-x86_32.h#1 (text) ==== content @@ -39,16 +39,7 @@ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) */ -#if __XEN_INTERFACE_VERSION__ < 0x00030203 /* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; int $0x82 - */ -#define TRAP_INSTR "int $0x82" -#endif - -/* * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid * installing their own GDT. @@ -111,8 +102,8 @@ __guest_handle_ ## name; \ typedef struct { union { type *p; uint64_aligned_t q; }; } \ __guest_handle_64_ ## name -#undef set_xen_guest_handle -#define set_xen_guest_handle(hnd, val) \ +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ (hnd).p = val; \ } while ( 0 ) ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_64.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/xen-x86_64.h#1 (text) ==== content @@ -36,16 +36,6 @@ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) */ -#if __XEN_INTERFACE_VERSION__ < 0x00030203 -/* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; syscall - * Clobbered: %rcx, %r11, argument registers (as above) - */ -#define TRAP_INSTR "syscall" -#endif - /* * 64-bit segment selectors * These flat segments are in the Xen-private section of every GDT. Since these ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86/xen.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86/xen.h#1 (text) ==== content @@ -32,8 +32,7 @@ #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else -#error "using old handle" -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef type * __guest_handle_ ## name #endif @@ -43,10 +42,11 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__i386__) #include @@ -63,6 +63,11 @@ * SEGMENT DESCRIPTOR TABLES */ /* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* * A number of GDT entries are reserved by Xen. These are not situated at the * start of the GDT because some stupid OSes export hard-coded selector values * in their ABI. These hard-coded values are always near the start of the GDT, @@ -72,15 +77,28 @@ #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; /* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. * The privilege level specifies which modes may enter a trap via a software * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate * privilege levels as follows: @@ -147,7 +165,7 @@ unsigned int event_callback_cs; /* compat CS of event cb */ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ }; - } u; + }; #else unsigned long syscall_callback_eip; #endif @@ -175,6 +193,24 @@ #endif /* !__ASSEMBLY__ */ /* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); + * + * ` unsigned long + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((unsigned long)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/arch-x86_64.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/arch-x86_64.h#1 (text) ==== content @@ -24,4 +24,20 @@ * Copyright (c) 2004-2006, K A Fraser */ -#include "arch-x86/xen.h" +#include + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_callbacks(unsigned long event_selector, + * ` unsigned long event_address, + * ` unsigned long failsafe_selector, + * ` unsigned long failsafe_address); + * ` + * Register for callbacks on events. When an event (from an event + * channel) occurs, event_address is used as the value of eip. + * + * A similar callback occurs if the segment selectors are invalid. + * failsafe_address is used as the value of eip. + * + * On x86_64, event_selector and failsafe_selector are ignored (???). + */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/domctl.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/domctl.h#1 (text) ==== content @@ -32,41 +32,38 @@ #error "domctl operations are intended for use by node control tools only" #endif -#include "xen.h" +#include +#include -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008 -struct xenctl_cpumap { - XEN_GUEST_HANDLE_64(uint8_t) bitmap; - uint32_t nr_cpus; -}; - /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. * If it is specified as zero, an id is auto-allocated and returned. */ -#define XEN_DOMCTL_createdomain 1 +/* XEN_DOMCTL_createdomain */ struct xen_domctl_createdomain { /* IN parameters */ uint32_t ssidref; xen_domain_handle_t handle; /* Is this an HVM guest (as opposed to a PV guest)? */ -#define _XEN_DOMCTL_CDF_hvm_guest 0 -#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) /* Use hardware-assisted paging if available? */ -#define _XEN_DOMCTL_CDF_hap 1 -#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + /* Should domain memory integrity be verifed by tboot during Sx? */ +#define _XEN_DOMCTL_CDF_s3_integrity 2 +#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* Disable out-of-sync shadow page tables? */ +#define _XEN_DOMCTL_CDF_oos_off 3 +#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) uint32_t flags; }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); -#define XEN_DOMCTL_destroydomain 2 -#define XEN_DOMCTL_pausedomain 3 -#define XEN_DOMCTL_unpausedomain 4 -#define XEN_DOMCTL_resumedomain 27 - -#define XEN_DOMCTL_getdomaininfo 5 +/* XEN_DOMCTL_getdomaininfo */ struct xen_domctl_getdomaininfo { /* OUT variables. */ domid_t domain; /* Also echoed in domctl.domain */ @@ -91,34 +88,34 @@ /* Being debugged. */ #define _XEN_DOMINF_debugged 6 #define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) - /* CPU to which this domain is bound. */ -#define XEN_DOMINF_cpumask 255 -#define XEN_DOMINF_cpushift 8 /* XEN_DOMINF_shutdown guest-supplied code. */ #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 uint32_t flags; /* XEN_DOMINF_* */ uint64_aligned_t tot_pages; uint64_aligned_t max_pages; + uint64_aligned_t shr_pages; + uint64_aligned_t paged_pages; uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ uint64_aligned_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; + uint32_t cpupool; }; typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); -#define XEN_DOMCTL_getmemlist 6 +/* XEN_DOMCTL_getmemlist */ struct xen_domctl_getmemlist { /* IN variables. */ /* Max entries to write to output buffer. */ uint64_aligned_t max_pfns; /* Start index in guest's page list. */ uint64_aligned_t start_pfn; - XEN_GUEST_HANDLE_64(uint64_t) buffer; + XEN_GUEST_HANDLE_64(uint64) buffer; /* OUT variables. */ uint64_aligned_t num_pfns; }; @@ -126,7 +123,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); -#define XEN_DOMCTL_getpageframeinfo 7 +/* XEN_DOMCTL_getpageframeinfo */ #define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 #define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) @@ -137,6 +134,8 @@ #define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) #define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ +#define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28) #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) struct xen_domctl_getpageframeinfo { @@ -150,21 +149,29 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); -#define XEN_DOMCTL_getpageframeinfo2 8 +/* XEN_DOMCTL_getpageframeinfo2 */ struct xen_domctl_getpageframeinfo2 { /* IN variables. */ uint64_aligned_t num; /* IN/OUT variables. */ - XEN_GUEST_HANDLE_64(uint32_t) array; + XEN_GUEST_HANDLE_64(uint32) array; }; typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + /* * Control shadow pagetables operation */ -#define XEN_DOMCTL_shadow_op 10 +/* XEN_DOMCTL_shadow_op */ /* Disable shadow mode. */ #define XEN_DOMCTL_SHADOW_OP_OFF 0 @@ -229,7 +236,7 @@ uint32_t mb; /* Shadow memory allocation in MB */ /* OP_PEEK / OP_CLEAN */ - XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap; + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; @@ -237,7 +244,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); -#define XEN_DOMCTL_max_mem 11 +/* XEN_DOMCTL_max_mem */ struct xen_domctl_max_mem { /* IN variables. */ uint64_aligned_t max_memkb; @@ -246,8 +253,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); -#define XEN_DOMCTL_setvcpucontext 12 -#define XEN_DOMCTL_getvcpucontext 13 +/* XEN_DOMCTL_setvcpucontext */ +/* XEN_DOMCTL_getvcpucontext */ struct xen_domctl_vcpucontext { uint32_t vcpu; /* IN */ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ @@ -256,7 +263,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); -#define XEN_DOMCTL_getvcpuinfo 14 +/* XEN_DOMCTL_getvcpuinfo */ struct xen_domctl_getvcpuinfo { /* IN variables. */ uint32_t vcpu; @@ -272,8 +279,8 @@ /* Get/set which physical cpus a vcpu can execute on. */ -#define XEN_DOMCTL_setvcpuaffinity 9 -#define XEN_DOMCTL_getvcpuaffinity 25 +/* XEN_DOMCTL_setvcpuaffinity */ +/* XEN_DOMCTL_getvcpuaffinity */ struct xen_domctl_vcpuaffinity { uint32_t vcpu; /* IN */ struct xenctl_cpumap cpumap; /* IN/OUT */ @@ -282,7 +289,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); -#define XEN_DOMCTL_max_vcpus 15 +/* XEN_DOMCTL_max_vcpus */ struct xen_domctl_max_vcpus { uint32_t max; /* maximum number of vcpus */ }; @@ -290,10 +297,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); -#define XEN_DOMCTL_scheduler_op 16 +/* XEN_DOMCTL_scheduler_op */ /* Scheduler types. */ #define XEN_SCHEDULER_SEDF 4 #define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_CREDIT2 6 +#define XEN_SCHEDULER_ARINC653 7 /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 @@ -312,13 +321,16 @@ uint16_t weight; uint16_t cap; } credit; + struct xen_domctl_sched_credit2 { + uint16_t weight; + } credit2; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); -#define XEN_DOMCTL_setdomainhandle 17 +/* XEN_DOMCTL_setdomainhandle */ struct xen_domctl_setdomainhandle { xen_domain_handle_t handle; }; @@ -326,7 +338,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); -#define XEN_DOMCTL_setdebugging 18 +/* XEN_DOMCTL_setdebugging */ struct xen_domctl_setdebugging { uint8_t enable; }; @@ -334,7 +346,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); -#define XEN_DOMCTL_irq_permission 19 +/* XEN_DOMCTL_irq_permission */ struct xen_domctl_irq_permission { uint8_t pirq; uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ @@ -343,7 +355,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); -#define XEN_DOMCTL_iomem_permission 20 +/* XEN_DOMCTL_iomem_permission */ struct xen_domctl_iomem_permission { uint64_aligned_t first_mfn;/* first page (physical page number) in range */ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ @@ -353,7 +365,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); -#define XEN_DOMCTL_ioport_permission 21 +/* XEN_DOMCTL_ioport_permission */ struct xen_domctl_ioport_permission { uint32_t first_port; /* first port int range */ uint32_t nr_ports; /* size of port range */ @@ -363,7 +375,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); -#define XEN_DOMCTL_hypercall_init 22 +/* XEN_DOMCTL_hypercall_init */ struct xen_domctl_hypercall_init { uint64_aligned_t gmfn; /* GMFN to be initialised */ }; @@ -371,7 +383,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); -#define XEN_DOMCTL_arch_setup 23 +/* XEN_DOMCTL_arch_setup */ #define _XEN_DOMAINSETUP_hvm_guest 0 #define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) #define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ @@ -391,35 +403,33 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); -#define XEN_DOMCTL_settimeoffset 24 +/* XEN_DOMCTL_settimeoffset */ struct xen_domctl_settimeoffset { int32_t time_offset_seconds; /* applied to domain wallclock time */ }; typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); - -#define XEN_DOMCTL_gethvmcontext 33 -#define XEN_DOMCTL_sethvmcontext 34 +/* XEN_DOMCTL_gethvmcontext */ +/* XEN_DOMCTL_sethvmcontext */ typedef struct xen_domctl_hvmcontext { uint32_t size; /* IN/OUT: size of buffer / bytes filled */ - XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call - * gethvmcontext with NULL - * buffer to get size - * req'd */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size req'd */ } xen_domctl_hvmcontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); -#define XEN_DOMCTL_set_address_size 35 -#define XEN_DOMCTL_get_address_size 36 +/* XEN_DOMCTL_set_address_size */ +/* XEN_DOMCTL_get_address_size */ typedef struct xen_domctl_address_size { uint32_t size; } xen_domctl_address_size_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); -#define XEN_DOMCTL_real_mode_area 26 +/* XEN_DOMCTL_real_mode_area */ struct xen_domctl_real_mode_area { uint32_t log; /* log2 of Real Mode Area size */ }; @@ -427,10 +437,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); -#define XEN_DOMCTL_sendtrigger 28 +/* XEN_DOMCTL_sendtrigger */ #define XEN_DOMCTL_SENDTRIGGER_NMI 0 #define XEN_DOMCTL_SENDTRIGGER_RESET 1 #define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 +#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 struct xen_domctl_sendtrigger { uint32_t trigger; /* IN */ uint32_t vcpu; /* IN */ @@ -440,19 +452,19 @@ /* Assign PCI device to HVM guest. Sets up IOMMU structures. */ -#define XEN_DOMCTL_assign_device 37 -#define XEN_DOMCTL_test_assign_device 45 -#define XEN_DOMCTL_deassign_device 47 +/* XEN_DOMCTL_assign_device */ +/* XEN_DOMCTL_test_assign_device */ +/* XEN_DOMCTL_deassign_device */ struct xen_domctl_assign_device { - uint32_t machine_bdf; /* machine PCI ID of assigned device */ + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ }; typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); -/* Retrieve sibling devices infomation of machine_bdf */ -#define XEN_DOMCTL_get_device_group 50 +/* Retrieve sibling devices infomation of machine_sbdf */ +/* XEN_DOMCTL_get_device_group */ struct xen_domctl_get_device_group { - uint32_t machine_bdf; /* IN */ + uint32_t machine_sbdf; /* IN */ uint32_t max_sdevs; /* IN */ uint32_t num_sdevs; /* OUT */ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ @@ -461,12 +473,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); /* Pass-through interrupts: bind real irq -> hvm devfn. */ -#define XEN_DOMCTL_bind_pt_irq 38 -#define XEN_DOMCTL_unbind_pt_irq 48 +/* XEN_DOMCTL_bind_pt_irq */ +/* XEN_DOMCTL_unbind_pt_irq */ typedef enum pt_irq_type_e { PT_IRQ_TYPE_PCI, PT_IRQ_TYPE_ISA, PT_IRQ_TYPE_MSI, + PT_IRQ_TYPE_MSI_TRANSLATE, } pt_irq_type_t; struct xen_domctl_bind_pt_irq { uint32_t machine_irq; @@ -485,6 +498,7 @@ struct { uint8_t gvec; uint32_t gflags; + uint64_aligned_t gtable; } msi; } u; }; @@ -493,7 +507,7 @@ /* Bind machine I/O address range -> HVM address range. */ -#define XEN_DOMCTL_memory_mapping 39 +/* XEN_DOMCTL_memory_mapping */ #define DPCI_ADD_MAPPING 1 #define DPCI_REMOVE_MAPPING 0 struct xen_domctl_memory_mapping { @@ -508,7 +522,7 @@ /* Bind machine I/O port range -> HVM I/O port range. */ -#define XEN_DOMCTL_ioport_mapping 40 +/* XEN_DOMCTL_ioport_mapping */ struct xen_domctl_ioport_mapping { uint32_t first_gport; /* first guest IO port*/ uint32_t first_mport; /* first machine IO port */ @@ -522,7 +536,7 @@ /* * Pin caching type of RAM space for x86 HVM domU. */ -#define XEN_DOMCTL_pin_mem_cacheattr 41 +/* XEN_DOMCTL_pin_mem_cacheattr */ /* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ #define XEN_DOMCTL_MEM_CACHEATTR_UC 0 #define XEN_DOMCTL_MEM_CACHEATTR_WC 1 @@ -532,20 +546,20 @@ #define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 struct xen_domctl_pin_mem_cacheattr { uint64_aligned_t start, end; - unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ + uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ }; typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); -#define XEN_DOMCTL_set_ext_vcpucontext 42 -#define XEN_DOMCTL_get_ext_vcpucontext 43 +/* XEN_DOMCTL_set_ext_vcpucontext */ +/* XEN_DOMCTL_get_ext_vcpucontext */ struct xen_domctl_ext_vcpucontext { /* IN: VCPU that this call applies to. */ uint32_t vcpu; /* * SET: Size of struct (IN) - * GET: Size of struct (OUT) + * GET: Size of struct (OUT, up to 128 bytes) */ uint32_t size; #if defined(__i386__) || defined(__x86_64__) @@ -557,6 +571,7 @@ uint16_t sysenter_callback_cs; uint8_t syscall32_disables_events; uint8_t sysenter_disables_events; + uint64_aligned_t mcg_cap; #endif }; typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; @@ -565,7 +580,7 @@ /* * Set optimizaton features for a domain */ -#define XEN_DOMCTL_set_opt_feature 44 +/* XEN_DOMCTL_set_opt_feature */ struct xen_domctl_set_opt_feature { #if defined(__ia64__) struct xen_ia64_opt_feature optf; @@ -580,7 +595,7 @@ /* * Set the target domain for a domain */ -#define XEN_DOMCTL_set_target 46 +/* XEN_DOMCTL_set_target */ struct xen_domctl_set_target { domid_t target; }; @@ -589,19 +604,19 @@ #if defined(__i386__) || defined(__x86_64__) # define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF -# define XEN_DOMCTL_set_cpuid 49 +/* XEN_DOMCTL_set_cpuid */ struct xen_domctl_cpuid { - unsigned int input[2]; - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; + uint32_t input[2]; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; }; typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); #endif -#define XEN_DOMCTL_subscribe 29 +/* XEN_DOMCTL_subscribe */ struct xen_domctl_subscribe { uint32_t port; /* IN */ }; @@ -612,12 +627,285 @@ * Define the maximum machine address size which should be allocated * to a guest. */ -#define XEN_DOMCTL_set_machine_address_size 51 -#define XEN_DOMCTL_get_machine_address_size 52 +/* XEN_DOMCTL_set_machine_address_size */ +/* XEN_DOMCTL_get_machine_address_size */ + +/* + * Do not inject spurious page faults into this domain. + */ +/* XEN_DOMCTL_suppress_spurious_page_faults */ + +/* XEN_DOMCTL_debug_op */ +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 +struct xen_domctl_debug_op { + uint32_t op; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); + +/* + * Request a particular record from the HVM context + */ +/* XEN_DOMCTL_gethvmcontext_partial */ +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* XEN_DOMCTL_disable_migrate */ +typedef struct xen_domctl_disable_migrate { + uint32_t disable; /* IN: 1: disable migration and restore */ +} xen_domctl_disable_migrate_t; + + +/* XEN_DOMCTL_gettscinfo */ +/* XEN_DOMCTL_settscinfo */ +struct xen_guest_tsc_info { + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint32_t pad; + uint64_aligned_t elapsed_nsec; +}; +typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); +typedef struct xen_domctl_tsc_info { + XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ + xen_guest_tsc_info_t info; /* IN */ +} xen_domctl_tsc_info_t; + +/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ +struct xen_domctl_gdbsx_memio { + /* IN */ + uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ + uint64_aligned_t gva; /* guest virtual address */ + uint64_aligned_t uva; /* user buffer virtual address */ + uint32_t len; /* number of bytes to read/write */ + uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ + /* OUT */ + uint32_t remain; /* bytes remaining to be copied */ +}; + +/* XEN_DOMCTL_gdbsx_pausevcpu */ +/* XEN_DOMCTL_gdbsx_unpausevcpu */ +struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ + uint32_t vcpu; /* which vcpu */ +}; + +/* XEN_DOMCTL_gdbsx_domstatus */ +struct xen_domctl_gdbsx_domstatus { + /* OUT */ + uint8_t paused; /* is the domain paused */ + uint32_t vcpu_id; /* any vcpu in an event? */ + uint32_t vcpu_ev; /* if yes, what event? */ +}; + +/* + * Memory event operations + */ + +/* XEN_DOMCTL_mem_event_op */ + +/* + * Domain memory paging + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 + +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 + +/* + * Access permissions. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * There are HVM hypercalls to set the per-page access permissions of every + * page in a domain. When one of these permissions--independent, read, + * write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 + +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 + +/* + * Sharing ENOMEM helper. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * If setup, this ring is used to communicate failed allocations + * in the unshare path. XENMEM_sharing_op_resume is used to wake up + * vcpus that could not unshare. + * + * Note that shring can be turned on (as per the domctl below) + * *without* this ring being setup. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 + +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 + +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ +struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + + uint32_t port; /* OUT: event channel for ring */ +}; +typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); + +/* + * Memory sharing operations + */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 + +struct xen_domctl_mem_sharing_op { + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ + + union { + uint8_t enable; /* CONTROL */ + } u; +}; +typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); + +struct xen_domctl_audit_p2m { + /* OUT error counts */ + uint64_t orphans; + uint64_t m2p_bad; + uint64_t p2m_bad; +}; +typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); + +struct xen_domctl_set_virq_handler { + uint32_t virq; /* IN */ +}; +typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); + +#if defined(__i386__) || defined(__x86_64__) +/* XEN_DOMCTL_setvcpuextstate */ +/* XEN_DOMCTL_getvcpuextstate */ +struct xen_domctl_vcpuextstate { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: xfeature support mask of struct (IN) + * GET: xfeature support mask of struct (IN/OUT) + * xfeature mask is served as identifications of the saving format + * so that compatible CPUs can have a check on format to decide + * whether it can restore. + */ + uint64_aligned_t xfeature_mask; + /* + * SET: Size of struct (IN) + * GET: Size of struct (IN/OUT) + */ + uint64_aligned_t size; + XEN_GUEST_HANDLE_64(uint64) buffer; +}; +typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); +#endif +/* XEN_DOMCTL_set_access_required: sets whether a memory event listener + * must be present to handle page access events: if false, the page + * access will revert to full permissions if no one is listening; + * */ +struct xen_domctl_set_access_required { + uint8_t access_required; +}; +typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); struct xen_domctl { uint32_t cmd; +#define XEN_DOMCTL_createdomain 1 +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_getmemlist 6 +#define XEN_DOMCTL_getpageframeinfo 7 +#define XEN_DOMCTL_getpageframeinfo2 8 +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_shadow_op 10 +#define XEN_DOMCTL_max_mem 11 +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +#define XEN_DOMCTL_getvcpuinfo 14 +#define XEN_DOMCTL_max_vcpus 15 +#define XEN_DOMCTL_scheduler_op 16 +#define XEN_DOMCTL_setdomainhandle 17 +#define XEN_DOMCTL_setdebugging 18 +#define XEN_DOMCTL_irq_permission 19 +#define XEN_DOMCTL_iomem_permission 20 +#define XEN_DOMCTL_ioport_permission 21 +#define XEN_DOMCTL_hypercall_init 22 +#define XEN_DOMCTL_arch_setup 23 +#define XEN_DOMCTL_settimeoffset 24 +#define XEN_DOMCTL_getvcpuaffinity 25 +#define XEN_DOMCTL_real_mode_area 26 +#define XEN_DOMCTL_resumedomain 27 +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_subscribe 29 +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_ioport_mapping 40 +#define XEN_DOMCTL_pin_mem_cacheattr 41 +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +#define XEN_DOMCTL_set_opt_feature 44 +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_set_target 46 +#define XEN_DOMCTL_deassign_device 47 +#define XEN_DOMCTL_unbind_pt_irq 48 +#define XEN_DOMCTL_set_cpuid 49 +#define XEN_DOMCTL_get_device_group 50 +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 +#define XEN_DOMCTL_suppress_spurious_page_faults 53 +#define XEN_DOMCTL_debug_op 54 +#define XEN_DOMCTL_gethvmcontext_partial 55 +#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_mem_sharing_op 57 +#define XEN_DOMCTL_disable_migrate 58 +#define XEN_DOMCTL_gettscinfo 59 +#define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 +#define XEN_DOMCTL_setvcpuextstate 62 +#define XEN_DOMCTL_getvcpuextstate 63 +#define XEN_DOMCTL_set_access_required 64 +#define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_gdbsx_guestmemio 1000 +#define XEN_DOMCTL_gdbsx_pausevcpu 1001 +#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 +#define XEN_DOMCTL_gdbsx_domstatus 1003 uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ domid_t domain; union { @@ -626,6 +914,7 @@ struct xen_domctl_getmemlist getmemlist; struct xen_domctl_getpageframeinfo getpageframeinfo; struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; struct xen_domctl_vcpuaffinity vcpuaffinity; struct xen_domctl_shadow_op shadow_op; struct xen_domctl_max_mem max_mem; @@ -641,8 +930,11 @@ struct xen_domctl_hypercall_init hypercall_init; struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_disable_migrate disable_migrate; + struct xen_domctl_tsc_info tsc_info; struct xen_domctl_real_mode_area real_mode_area; struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; struct xen_domctl_get_device_group get_device_group; @@ -655,9 +947,19 @@ struct xen_domctl_set_opt_feature set_opt_feature; struct xen_domctl_set_target set_target; struct xen_domctl_subscribe subscribe; + struct xen_domctl_debug_op debug_op; + struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_mem_sharing_op mem_sharing_op; #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; + struct xen_domctl_vcpuextstate vcpuextstate; #endif + struct xen_domctl_set_access_required access_required; + struct xen_domctl_audit_p2m audit_p2m; + struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; + struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; uint8_t pad[128]; } u; }; ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/elfnote.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/elfnote.h#1 (text) ==== content @@ -162,9 +162,39 @@ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 /* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES /* * System information exported through crash notes. ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/elfstructs.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/elfstructs.h#1 (text) ==== identical ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/event_channel.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/event_channel.h#1 (text) ==== content @@ -1,8 +1,8 @@ /****************************************************************************** * event_channel.h - * + * * Event channels between domains. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -28,14 +28,54 @@ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ /* - * Prototype for this hypercall is: - * int event_channel_op(int cmd, void *args) - * @cmd == EVTCHNOP_??? (event-channel operation). - * @args == Operation-specific extra arguments (NULL if none). + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). */ +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +/* ` } */ + +#ifndef __XEN_EVTCHN_PORT_DEFINED__ typedef uint32_t evtchn_port_t; DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as @@ -45,7 +85,6 @@ * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_alloc_unbound 6 struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; @@ -61,9 +100,8 @@ * domain. A fresh port is allocated in the calling domain and returned as * . * NOTES: - * 2. may be DOMID_SELF, allowing loopback connections. + * 1. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_bind_interdomain 0 struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; @@ -85,10 +123,9 @@ * The allocated event channel is bound to the specified vcpu and the * binding cannot be changed. */ -#define EVTCHNOP_bind_virq 1 struct evtchn_bind_virq { /* IN parameters. */ - uint32_t virq; + uint32_t virq; /* enum virq */ uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; @@ -96,12 +133,11 @@ typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* - * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ -#define EVTCHNOP_bind_pirq 2 struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; @@ -118,7 +154,6 @@ * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ -#define EVTCHNOP_bind_ipi 7 struct evtchn_bind_ipi { uint32_t vcpu; /* OUT parameters. */ @@ -131,7 +166,6 @@ * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ -#define EVTCHNOP_close 3 struct evtchn_close { /* IN parameters. */ evtchn_port_t port; @@ -142,7 +176,6 @@ * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ -#define EVTCHNOP_send 4 struct evtchn_send { /* IN parameters. */ evtchn_port_t port; @@ -157,7 +190,6 @@ * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ -#define EVTCHNOP_status 5 struct evtchn_status { /* IN parameters */ domid_t dom; @@ -174,13 +206,13 @@ union { struct { domid_t dom; - } unbound; /* EVTCHNSTAT_unbound */ + } unbound; /* EVTCHNSTAT_unbound */ struct { domid_t dom; evtchn_port_t port; - } interdomain; /* EVTCHNSTAT_interdomain */ - uint32_t pirq; /* EVTCHNSTAT_pirq */ - uint32_t virq; /* EVTCHNSTAT_virq */ + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ } u; }; typedef struct evtchn_status evtchn_status_t; @@ -197,7 +229,6 @@ * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ -#define EVTCHNOP_bind_vcpu 8 struct evtchn_bind_vcpu { /* IN parameters. */ evtchn_port_t port; @@ -209,7 +240,6 @@ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ -#define EVTCHNOP_unmask 9 struct evtchn_unmask { /* IN parameters. */ evtchn_port_t port; @@ -222,7 +252,6 @@ * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. */ -#define EVTCHNOP_reset 10 struct evtchn_reset { /* IN parameters. */ domid_t dom; @@ -230,11 +259,13 @@ typedef struct evtchn_reset evtchn_reset_t; /* - * Argument to event_channel_op_compat() hypercall. Superceded by new - * event_channel_op() hypercall since 0x00030202. + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. */ struct evtchn_op { - uint32_t cmd; /* EVTCHNOP_* */ + uint32_t cmd; /* enum event_channel_op */ union { struct evtchn_alloc_unbound alloc_unbound; struct evtchn_bind_interdomain bind_interdomain; ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/features.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/features.h#1 (text) ==== content @@ -59,6 +59,27 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + +/* x86: Does this Xen host support the HVM callback vector type? */ +#define XENFEAT_hvm_callback_vector 8 + +/* x86: pvclock algorithm is safe to use on HVM */ +#define XENFEAT_hvm_safe_pvclock 9 + +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + +/* operation as Dom0 is supported */ +#define XENFEAT_dom0 11 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/grant_table.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/grant_table.h#1 (text) ==== content @@ -1,9 +1,9 @@ /****************************************************************************** * grant_table.h - * + * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -28,6 +28,28 @@ #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ #define __XEN_PUBLIC_GRANT_TABLE_H__ +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granter’s memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ /*********************************** * GRANT TABLE REPRESENTATION @@ -35,11 +57,12 @@ /* Some rough guidelines on accessing and updating grant-table entries * in a concurrency-safe manner. For more information, Linux contains a - * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). - * + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * * NB. WMB is a no-op on current-generation x86 processors. However, a * compiler barrier will still be required. - * + * * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: @@ -48,7 +71,7 @@ * frame, or zero if none. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. - * + * * Invalidating an unused GTF_permit_access entry: * 1. flags = ent->flags. * 2. Observe that !(flags & (GTF_reading|GTF_writing)). @@ -60,7 +83,7 @@ * This cannot be done directly. Request assistance from the domain controller * which can set a timeout on the use of a grant entry and take necessary * action. (NB. This is not yet implemented!). - * + * * Invalidating an unused GTF_accept_transfer entry: * 1. flags = ent->flags. * 2. Observe that !(flags & GTF_transfer_committed). [*] @@ -78,18 +101,34 @@ * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. - * + * * Changing a GTF_permit_access from read-only to writable: * Use SMP-safe bit-setting instruction. */ /* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +#define GRANT_REF_INVALID 0xffffffff + +/* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -100,7 +139,14 @@ */ uint32_t frame; }; -typedef struct grant_entry grant_entry_t; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 /* * Type of grant entry. @@ -108,10 +154,13 @@ * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -120,6 +169,9 @@ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -133,6 +185,8 @@ #define GTF_PCD (1U<<_GTF_PCD) #define _GTF_PAT (7) #define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -149,17 +203,114 @@ #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + /*********************************** * GRANT TABLE QUERIES AND USES */ -/* - * Reference to a grant entry in a specified domain's grant table. +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members */ -typedef uint32_t grant_ref_t; -#define GRANT_REF_INVALID 0xffffffff +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ /* * Handle to track a mapping created via a grant reference. @@ -177,13 +328,12 @@ * 2. If GNTMAP_host_map is specified then a mapping will be added at * either a host virtual address in the current address space, or at * a PTE at the specified machine address. The type of mapping to - * perform is selected through the GNTMAP_contains_pte flag, and the + * perform is selected through the GNTMAP_contains_pte flag, and the * address is specified in . * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a * host mapping is destroyed by other means then it is *NOT* guaranteed * to be accounted to the correct grant reference! */ -#define GNTTABOP_map_grant_ref 0 struct gnttab_map_grant_ref { /* IN parameters. */ uint64_t host_addr; @@ -191,7 +341,7 @@ grant_ref_t ref; domid_t dom; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ grant_handle_t handle; uint64_t dev_bus_addr; }; @@ -209,14 +359,13 @@ * 3. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ -#define GNTTABOP_unmap_grant_ref 1 struct gnttab_unmap_grant_ref { /* IN parameters. */ uint64_t host_addr; uint64_t dev_bus_addr; grant_handle_t handle; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); @@ -230,13 +379,12 @@ * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. * 3. Xen may not support more than a single grant-table page per domain. */ -#define GNTTABOP_setup_table 2 struct gnttab_setup_table { /* IN parameters. */ domid_t dom; uint32_t nr_frames; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ XEN_GUEST_HANDLE(ulong) frame_list; }; typedef struct gnttab_setup_table gnttab_setup_table_t; @@ -246,12 +394,11 @@ * GNTTABOP_dump_table: Dump the contents of the grant table to the * xen console. Debugging use only. */ -#define GNTTABOP_dump_table 3 struct gnttab_dump_table { /* IN parameters. */ domid_t dom; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_dump_table gnttab_dump_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); @@ -260,11 +407,10 @@ * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The * foreign domain has previously registered its interest in the transfer via * . - * + * * Note that, even if the transfer fails, the specified page no longer belongs * to the calling domain *unless* the error is GNTST_bad_page. */ -#define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ xen_pfn_t mfn; @@ -299,9 +445,10 @@ #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) +#define _GNTCOPY_can_fail (2) +#define GNTCOPY_can_fail (1<<_GNTCOPY_can_fail) -#define GNTTABOP_copy 5 -typedef struct gnttab_copy { +struct gnttab_copy { /* IN parameters. */ struct { union { @@ -315,7 +462,8 @@ uint16_t flags; /* GNTCOPY_* */ /* OUT parameters. */ int16_t status; -} gnttab_copy_t; +}; +typedef struct gnttab_copy gnttab_copy_t; DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); /* @@ -325,14 +473,13 @@ * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. */ -#define GNTTABOP_query_size 6 struct gnttab_query_size { /* IN parameters. */ domid_t dom; /* OUT parameters. */ uint32_t nr_frames; uint32_t max_nr_frames; - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_query_size gnttab_query_size_t; DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); @@ -348,21 +495,87 @@ * 2. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ -#define GNTTABOP_unmap_and_replace 7 struct gnttab_unmap_and_replace { /* IN parameters. */ uint64_t host_addr; uint64_t new_addr; grant_handle_t handle; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + /* - * Bitfield values for update_pin_status.flags. + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) @@ -389,9 +602,20 @@ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ +/* ` enum grant_status { */ #define GNTST_okay (0) /* Normal return. */ #define GNTST_general_error (-1) /* General undefined error. */ #define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ @@ -404,6 +628,8 @@ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ #define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -417,7 +643,8 @@ "permission denied", \ "bad page", \ "copy arguments cross page boundary", \ - "page address size too large" \ + "page address size too large", \ + "operation not done; try again" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/hvm/hvm_info_table.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/hvm/hvm_info_table.h#1 (text) ==== content @@ -29,13 +29,44 @@ #define HVM_INFO_OFFSET 0x800 #define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) +/* Maximum we can support with current vLAPIC ID mapping. */ +#define HVM_MAX_VCPUS 128 + struct hvm_info_table { char signature[8]; /* "HVM INFO" */ uint32_t length; uint8_t checksum; - uint8_t acpi_enabled; + + /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ uint8_t apic_mode; + + /* How many CPUs does this domain have? */ uint32_t nr_vcpus; + + /* + * MEMORY MAP provided by HVM domain builder. + * Notes: + * 1. page_to_phys(x) = x << 12 + * 2. If a field is zero, the corresponding range does not exist. + */ + /* + * 0x0 to page_to_phys(low_mem_pgend)-1: + * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) + */ + uint32_t low_mem_pgend; + /* + * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: + * Reserved for special memory mappings + */ + uint32_t reserved_mem_pgstart; + /* + * 0x100000000 to page_to_phys(high_mem_pgend)-1: + * RAM above 4GB + */ + uint32_t high_mem_pgend; + + /* Bitmap of which CPUs are online at boot time. */ + uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; }; #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/hvm/hvm_op.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/hvm/hvm_op.h#1 (text) ==== content @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include + /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ #define HVMOP_set_param 0 #define HVMOP_get_param 1 @@ -73,6 +75,12 @@ /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; + /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) @@ -106,25 +114,149 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); #define HVMOP_set_mem_type 8 -typedef enum { - HVMMEM_ram_rw, /* Normal read/write guest RAM */ - HVMMEM_ram_ro, /* Read-only; writes are discarded */ - HVMMEM_mmio_dm, /* Reads and write go to the device model */ -} hvmmem_type_t; /* Notify that a region of memory is to be treated in a specific way. */ struct xen_hvm_set_mem_type { /* Domain to be updated. */ domid_t domid; /* Memory type */ - hvmmem_type_t hvmmem_type; + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; /* First pfn. */ uint64_aligned_t first_pfn; - /* Number of pages. */ - uint64_aligned_t nr; }; typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ + /* guest physical address of the toplevel pagetable dying */ + uint64_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#define HVMOP_set_mem_access 12 +typedef enum { + HVMMEM_access_n, + HVMMEM_access_r, + HVMMEM_access_w, + HVMMEM_access_rw, + HVMMEM_access_x, + HVMMEM_access_rx, + HVMMEM_access_wx, + HVMMEM_access_rwx, + HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically + * change to r-w on a write */ + HVMMEM_access_n2rwx, /* Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu */ + HVMMEM_access_default /* Take the domain default */ +} hvmmem_access_t; +/* Notify that a region of memory is to have specific access types */ +struct xen_hvm_set_mem_access { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Number of pages, ignored on setting default access */ + uint32_t nr; + /* First pfn, or ~0ull to set the default access for new pages */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t); + +#define HVMOP_get_mem_access 13 +/* Get the specific access type for that region of memory */ +struct xen_hvm_get_mem_access { + /* Domain to be queried. */ + domid_t domid; + /* Memory type: OUT */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* pfn, or ~0ull for default access for new pages. IN */ + uint64_aligned_t pfn; +}; +typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t); + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Trap number */ + uint32_t trap; + /* Error code, or -1 to skip */ + uint32_t error_code; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/hvm/ioreq.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/hvm/ioreq.h#1 (text) ==== content @@ -43,32 +43,24 @@ * virq */ struct ioreq { - uint64_t addr; /* physical address */ - uint64_t size; /* size in bytes */ - uint64_t count; /* for rep prefixes */ - uint64_t data; /* data (or paddr of data) */ + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; uint8_t state:4; - uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr - * of the real data to use. */ - uint8_t dir:1; /* 1=read, 0=write */ + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ uint8_t df:1; - uint8_t pad:1; - uint8_t type; /* I/O type */ - uint8_t _pad0[6]; - uint64_t io_count; /* How many IO done on a vcpu */ + uint8_t _pad1:1; + uint8_t type; /* I/O type */ }; typedef struct ioreq ioreq_t; -struct vcpu_iodata { - struct ioreq vp_ioreq; - /* Event channel port, used for notifications to/from the device model. */ - uint32_t vp_eport; - uint32_t _pad0; -}; -typedef struct vcpu_iodata vcpu_iodata_t; - struct shared_iopage { - struct vcpu_iodata vcpu_iodata[1]; + struct ioreq vcpu_ioreq[1]; }; typedef struct shared_iopage shared_iopage_t; @@ -108,11 +100,32 @@ }; #endif /* defined(__ia64__) */ -#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 -#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) -#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) -#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) -#define ACPI_GPE0_BLK_LEN 0x08 +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + #endif /* _IOREQ_H_ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/hvm/params.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/hvm/params.h#1 (text) ==== content @@ -33,6 +33,9 @@ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: * Domain = val[47:32], Bus = val[31:16], * DevFn = val[15: 8], IntX = val[ 1: 0] + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. * If val == 0 then CPU0 event-channel notifications are not delivered. */ #define HVM_PARAM_CALLBACK_IRQ 0 @@ -49,11 +52,19 @@ #define HVM_PARAM_IOREQ_PFN 5 #define HVM_PARAM_BUFIOREQ_PFN 6 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 #ifdef __ia64__ + #define HVM_PARAM_NVRAM_FD 7 #define HVM_PARAM_VHPT_SIZE 8 #define HVM_PARAM_BUFPIOREQ_PFN 9 + +#elif defined(__i386__) || defined(__x86_64__) + +/* Expose Viridian interfaces to this HVM guest? */ +#define HVM_PARAM_VIRIDIAN 9 + #endif /* @@ -93,32 +104,49 @@ /* ACPI S state: currently support S0 and S3 on x86. */ #define HVM_PARAM_ACPI_S_STATE 14 -#define HVM_NR_PARAMS 15 +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 -#ifdef XENHVM -/** - * Retrieve an HVM setting from the hypervisor. - * - * \param index The index of the HVM parameter to retrieve. - * - * \return On error, 0. Otherwise the value of the requested parameter. +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in */ -static inline unsigned long -hvm_get_parameter(int index) -{ - struct xen_hvm_param xhv; - int error; +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +/* Boolean: Enable nestedhvm (hvm only) */ +#define HVM_PARAM_NESTEDHVM 24 + +/* Params for the mem event rings */ +#define HVM_PARAM_PAGING_RING_PFN 27 +#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_SHARING_RING_PFN 29 - xhv.domid = DOMID_SELF; - xhv.index = index; - error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); - if (error) { - printf("hvm_get_parameter: failed to get %d, error %d\n", - index, error); - return (0); - } - return (xhv.value); -} -#endif +#define HVM_NR_PARAMS 30 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/hvm/save.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/hvm/save.h#1 (text) ==== content @@ -61,13 +61,36 @@ * ugliness. */ -#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ - struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } +#ifdef __XEN__ +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } + +# include /* BUG() */ +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } +#else +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} + +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} +#endif #define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) #define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) #define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) +#ifdef __XEN__ +# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) +# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) + +# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) +# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +#endif /* * The series of save records is teminated by a zero-type, zero-length @@ -78,9 +101,11 @@ DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); #if defined(__i386__) || defined(__x86_64__) -#include "../arch-x86/hvm/save.h" +#include * Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE) - * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring_pages) + * Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages) * * The maximum number of concurrent, logical requests that will be * issued by the frontend. ==== - //SpectraBSD/stable/sys/xen/interface/io/fsif.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/io/libxenvchan.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/io/netif.h#2 (text) - //SpectraBSD/stable/sys/xen/interface/io/netif.h#1 (text) ==== content @@ -173,6 +173,10 @@ #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) +/* GSO Prefix descriptor. */ +#define _NETRXF_gso_prefix (4) +#define NETRXF_gso_prefix (1U<<_NETRXF_gso_prefix) + struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/io/pciif.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/io/pciif.h#1 (text) ==== content @@ -29,15 +29,23 @@ /* xen_pci_sharedinfo flags */ #define _XEN_PCIF_active (0) -#define XEN_PCIF_active (1<<_XEN_PCI_active) +#define XEN_PCIF_active (1<<_XEN_PCIF_active) +#define _XEN_PCIB_AERHANDLER (1) +#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER) +#define _XEN_PCIB_active (2) +#define XEN_PCIB_active (1<<_XEN_PCIB_active) /* xen_pci_op commands */ -#define XEN_PCI_OP_conf_read (0) -#define XEN_PCI_OP_conf_write (1) -#define XEN_PCI_OP_enable_msi (2) -#define XEN_PCI_OP_disable_msi (3) -#define XEN_PCI_OP_enable_msix (4) -#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_conf_read (0) +#define XEN_PCI_OP_conf_write (1) +#define XEN_PCI_OP_enable_msi (2) +#define XEN_PCI_OP_disable_msi (3) +#define XEN_PCI_OP_enable_msix (4) +#define XEN_PCI_OP_disable_msix (5) +#define XEN_PCI_OP_aer_detected (6) +#define XEN_PCI_OP_aer_resume (7) +#define XEN_PCI_OP_aer_mmio (8) +#define XEN_PCI_OP_aer_slotreset (9) /* xen_pci_op error numbers */ #define XEN_PCI_ERR_success (0) @@ -82,10 +90,25 @@ struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC]; }; +/*used for pcie aer handling*/ +struct xen_pcie_aer_op +{ + + /* IN: what action to perform: XEN_PCI_OP_* */ + uint32_t cmd; + /*IN/OUT: return aer_op result or carry error_detected state as input*/ + int32_t err; + + /* IN: which device to touch */ + uint32_t domain; /* PCI Domain/Segment*/ + uint32_t bus; + uint32_t devfn; +}; struct xen_pci_sharedinfo { /* flags - XEN_PCIF_* */ uint32_t flags; struct xen_pci_op op; + struct xen_pcie_aer_op aer_op; }; #endif /* __XEN_PCI_COMMON_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/io/protocols.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/io/protocols.h#1 (text) ==== content @@ -26,7 +26,7 @@ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" #define XEN_IO_PROTO_ABI_IA64 "ia64-abi" -#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 @@ -34,8 +34,8 @@ # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 #elif defined(__ia64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 -#elif defined(__powerpc64__) -# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 +#elif defined(__arm__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM #else # error arch fixup needed here #endif ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/io/ring.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/io/ring.h#1 (text) ==== content @@ -53,9 +53,15 @@ /* * Calculate size of a shared ring, given the total available space for the * ring and indexes (_sz), and the name tag of the request/response structure. - * A ring contains as many entries as will fit, rounded down to the nearest + * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ #define __RING_SIZE(_s, _sz) \ (__RD32(((_sz) - __RING_HEADER_SIZE(_s)) / sizeof((_s)->ring[0]))) @@ -113,7 +119,16 @@ struct __name##_sring { \ RING_IDX req_prod, req_event; \ RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ union __name##_sring_entry ring[1]; /* variable-length */ \ }; \ \ @@ -157,7 +172,8 @@ #define SHARED_RING_INIT(_s) do { \ (_s)->req_prod = (_s)->rsp_prod = 0; \ (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) #define FRONT_RING_INIT(_r, _s, __size) do { \ ==== - //SpectraBSD/stable/sys/xen/interface/io/usbif.h#1 ==== ==== - //SpectraBSD/stable/sys/xen/interface/io/vscsiif.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/io/xs_wire.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/io/xs_wire.h#1 (text) ==== content @@ -47,7 +47,9 @@ XS_ERROR, XS_IS_DOMAIN_INTRODUCED, XS_RESUME, - XS_SET_TARGET + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES }; #define XS_WRITE_NONE "NONE" @@ -60,6 +62,7 @@ int errnum; const char *errstring; }; +#ifdef EINVAL #define XSD_ERROR(x) { x, #x } /* LINTED: static unused */ static struct xsd_errors xsd_errors[] @@ -82,6 +85,7 @@ XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN) }; +#endif struct xsd_sockmsg { ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/kexec.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/kexec.h#1 (text) ==== content @@ -155,27 +155,6 @@ unsigned long start; } xen_kexec_range_t; -/* vmcoreinfo stuff */ -#define VMCOREINFO_BYTES (4096) -#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN" -void arch_crash_save_vmcoreinfo(void); -void vmcoreinfo_append_str(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \ - (unsigned long)offsetof(struct name, field)) - #endif /* _XEN_PUBLIC_KEXEC_H */ /* ==== - //SpectraBSD/stable/sys/xen/interface/mem_event.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/memory.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/memory.h#1 (text) ==== content @@ -27,6 +27,8 @@ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ +#include + /* * Increase or decrease the specified domain's memory reservation. Returns the * number of extents successfully allocated or freed. @@ -48,6 +50,11 @@ /* NUMA node to allocate from. */ #define XENMEMF_node(x) (((x) + 1) << 8) #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) #endif struct xen_memory_reservation { @@ -201,12 +208,18 @@ /* Which domain to change the mapping for. */ domid_t domid; + /* Number of pages to go through for gmfn_range */ + uint16_t size; + /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ -#define XENMAPSPACE_mfn 2 /* usual MFN */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */ unsigned int space; +#define XENMAPIDX_grant_table_status 0x80000000 + /* Index into source mapping space. */ xen_ulong_t idx; @@ -232,29 +245,8 @@ typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); -/* - * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error - * code on failure. This call only works for auto-translated guests. - */ -#define XENMEM_translate_gpfn_list 8 -struct xen_translate_gpfn_list { - /* Which domain to translate for? */ - domid_t domid; - - /* Length of list. */ - xen_ulong_t nr_gpfns; - - /* List of GPFNs to translate. */ - XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; - - /* - * Output list to contain MFN translations. May be the same as the input - * list (in which case each input GPFN is overwritten with the output MFN). - */ - XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; -}; -typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; -DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ /* * Returns the pseudo-physical memory map as it was when the domain @@ -299,6 +291,114 @@ typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 +#define XENMEM_get_sharing_shared_pages 19 + +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_aligned_t buffer; + /* Other OPs */ + uint64_aligned_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 +#define XENMEM_sharing_op_audit 8 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +/* The following allows sharing of grant refs. This is useful + * for sharing utilities sitting as "filters" in IO backends + * (e.g. memshr + blktap(2)). The IO backend is only exposed + * to grant references, and this allows sharing of the grefs */ +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_aligned_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ + uint64_aligned_t source_handle; /* IN: handle to the source page */ + uint64_aligned_t client_gfn; /* IN: the client gfn */ + uint64_aligned_t client_handle; /* IN: handle to the client page */ + domid_t client_domain; /* IN: the client domain id */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to debug */ + uint64_aligned_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_MEMORY_H__ */ /* ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/physdev.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/physdev.h#1 (text) ==== content @@ -41,6 +41,29 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); /* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ @@ -125,6 +148,7 @@ #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -135,7 +159,7 @@ int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -168,6 +192,31 @@ typedef struct physdev_manage_pci physdev_manage_pci_t; DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. @@ -185,6 +234,82 @@ typedef struct physdev_op physdev_op_t; DEFINE_XEN_GUEST_HANDLE(physdev_op_t); +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** @@ -206,6 +331,12 @@ #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi #define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ /* ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/platform.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/platform.h#1 (text) ==== content @@ -114,10 +114,110 @@ typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + unsigned long status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + unsigned long size; + XEN_GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + unsigned long size; + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t max_capsule_size; + unsigned int reset_type; + } query_capsule_capabilities; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); + #define XENPF_firmware_info 50 #define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ #define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 struct xenpf_firmware_info { /* IN variables. */ uint32_t type; @@ -148,6 +248,24 @@ /* must refer to 128-byte buffer */ XEN_GUEST_HANDLE(uint8) edid; } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + } efi_info; /* XEN_FW_EFI_INFO */ } u; }; typedef struct xenpf_firmware_info xenpf_firmware_info_t; @@ -210,6 +328,7 @@ #define XEN_PM_CX 0 #define XEN_PM_PX 1 #define XEN_PM_TX 2 +#define XEN_PM_PDC 3 /* Px sub info type */ #define XEN_PX_PCT 1 @@ -307,11 +426,88 @@ union { struct xen_processor_power power;/* Cx: _CST/_CSD */ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ } u; }; typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + +#define XENPF_get_cpu_version 48 +struct xenpf_pcpu_version { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; + char vendor_id[12]; + uint32_t family; + uint32_t model; + uint32_t stepping; +}; +typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol +{ + uint32_t cpuid; +}; +typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd +{ + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +typedef struct xenpf_core_parking xenpf_core_parking_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_platform_op(const struct xen_platform_op*); + */ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -322,11 +518,18 @@ struct xenpf_read_memtype read_memtype; struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; struct xenpf_firmware_info firmware_info; struct xenpf_enter_acpi_sleep enter_acpi_sleep; struct xenpf_change_freq change_freq; struct xenpf_getidletime getidletime; struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; uint8_t pad[128]; } u; }; ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/sched.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/sched.h#1 (text) ==== content @@ -99,6 +99,29 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); /* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. @@ -107,6 +130,7 @@ #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ #endif /* __XEN_PUBLIC_SCHED_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/sysctl.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/sysctl.h#1 (text) ==== content @@ -34,12 +34,12 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000009 /* * Read console content from Xen buffer ring. */ -#define XEN_SYSCTL_readconsole 1 +/* XEN_SYSCTL_readconsole */ struct xen_sysctl_readconsole { /* IN: Non-zero -> clear after reading. */ uint8_t clear; @@ -60,7 +60,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); /* Get trace buffers machine base address */ -#define XEN_SYSCTL_tbuf_op 2 +/* XEN_SYSCTL_tbuf_op */ struct xen_sysctl_tbuf_op { /* IN variables */ #define XEN_SYSCTL_TBUFOP_get_info 0 @@ -75,7 +75,7 @@ uint32_t evt_mask; /* OUT variables */ uint64_aligned_t buffer_mfn; - uint32_t size; + uint32_t size; /* Also an IN variable! */ }; typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); @@ -83,7 +83,7 @@ /* * Get physical information about the host machine */ -#define XEN_SYSCTL_physinfo 3 +/* XEN_SYSCTL_physinfo */ /* (x86) The platform supports HVM guests. */ #define _XEN_SYSCTL_PHYSCAP_hvm 0 #define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) @@ -93,30 +93,16 @@ struct xen_sysctl_physinfo { uint32_t threads_per_core; uint32_t cores_per_socket; - uint32_t nr_cpus; - uint32_t nr_nodes; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ uint32_t cpu_khz; uint64_aligned_t total_pages; uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; - /* - * IN: maximum addressable entry in the caller-provided cpu_to_node array. - * OUT: largest cpu identifier in the system. - * If OUT is greater than IN then the cpu_to_node array is truncated! - */ - uint32_t max_cpu_id; - /* - * If not NULL, this array is filled with node identifier for each cpu. - * If a cpu has no node information (e.g., cpu not present) then the - * sentinel value ~0u is written. - * The size of this array is specified by the caller in @max_cpu_id. - * If the actual @max_cpu_id is smaller than the array then the trailing - * elements of the array will not be written by the sysctl. - */ - XEN_GUEST_HANDLE_64(uint32) cpu_to_node; - /* XEN_SYSCTL_PHYSCAP_??? */ uint32_t capabilities; }; @@ -126,7 +112,7 @@ /* * Get the ID of the current scheduler. */ -#define XEN_SYSCTL_sched_id 4 +/* XEN_SYSCTL_sched_id */ struct xen_sysctl_sched_id { /* OUT variable */ uint32_t sched_id; @@ -135,7 +121,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); /* Interface for controlling Xen software performance counters. */ -#define XEN_SYSCTL_perfc_op 5 +/* XEN_SYSCTL_perfc_op */ /* Sub-operations: */ #define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ #define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ @@ -162,7 +148,7 @@ typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); -#define XEN_SYSCTL_getdomaininfolist 6 +/* XEN_SYSCTL_getdomaininfolist */ struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; @@ -175,7 +161,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); /* Inject debug keys into Xen. */ -#define XEN_SYSCTL_debug_keys 7 +/* XEN_SYSCTL_debug_keys */ struct xen_sysctl_debug_keys { /* IN variables. */ XEN_GUEST_HANDLE_64(char) keys; @@ -185,7 +171,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); /* Get physical CPU information. */ -#define XEN_SYSCTL_getcpuinfo 8 +/* XEN_SYSCTL_getcpuinfo */ struct xen_sysctl_cpuinfo { uint64_aligned_t idletime; }; @@ -201,7 +187,7 @@ typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); -#define XEN_SYSCTL_availheap 9 +/* XEN_SYSCTL_availheap */ struct xen_sysctl_availheap { /* IN variables. */ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ @@ -213,7 +199,7 @@ typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); -#define XEN_SYSCTL_get_pmstat 10 +/* XEN_SYSCTL_get_pmstat */ struct pm_px_val { uint64_aligned_t freq; /* Px core frequency */ uint64_aligned_t residency; /* Px residency time */ @@ -239,6 +225,13 @@ uint64_aligned_t idle_time; /* idle time from boot */ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ + uint64_aligned_t pc2; + uint64_aligned_t pc3; + uint64_aligned_t pc6; + uint64_aligned_t pc7; + uint64_aligned_t cc3; + uint64_aligned_t cc6; + uint64_aligned_t cc7; }; struct xen_sysctl_get_pmstat { @@ -262,7 +255,7 @@ typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); -#define XEN_SYSCTL_cpu_hotplug 11 +/* XEN_SYSCTL_cpu_hotplug */ struct xen_sysctl_cpu_hotplug { /* IN variables */ uint32_t cpu; /* Physical cpu. */ @@ -273,14 +266,362 @@ typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); +/* + * Get/set xen power management, include + * 1. cpufreq governors and related parameters + */ +/* XEN_SYSCTL_pm_op */ +struct xen_userspace { + uint32_t scaling_setspeed; +}; +typedef struct xen_userspace xen_userspace_t; + +struct xen_ondemand { + uint32_t sampling_rate_max; + uint32_t sampling_rate_min; + + uint32_t sampling_rate; + uint32_t up_threshold; +}; +typedef struct xen_ondemand xen_ondemand_t; + +/* + * cpufreq para name of this structure named + * same as sysfs file name of native linux + */ +#define CPUFREQ_NAME_LEN 16 +struct xen_get_cpufreq_para { + /* IN/OUT variable */ + uint32_t cpu_num; + uint32_t freq_num; + uint32_t gov_num; + + /* for all governors */ + /* OUT variable */ + XEN_GUEST_HANDLE_64(uint32) affected_cpus; + XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; + XEN_GUEST_HANDLE_64(char) scaling_available_governors; + char scaling_driver[CPUFREQ_NAME_LEN]; + + uint32_t cpuinfo_cur_freq; + uint32_t cpuinfo_max_freq; + uint32_t cpuinfo_min_freq; + uint32_t scaling_cur_freq; + + char scaling_governor[CPUFREQ_NAME_LEN]; + uint32_t scaling_max_freq; + uint32_t scaling_min_freq; + + /* for specific governor */ + union { + struct xen_userspace userspace; + struct xen_ondemand ondemand; + } u; + + int32_t turbo_enabled; +}; + +struct xen_set_cpufreq_gov { + char scaling_governor[CPUFREQ_NAME_LEN]; +}; + +struct xen_set_cpufreq_para { + #define SCALING_MAX_FREQ 1 + #define SCALING_MIN_FREQ 2 + #define SCALING_SETSPEED 3 + #define SAMPLING_RATE 4 + #define UP_THRESHOLD 5 + + uint32_t ctrl_type; + uint32_t ctrl_value; +}; + +struct xen_sysctl_pm_op { + #define PM_PARA_CATEGORY_MASK 0xf0 + #define CPUFREQ_PARA 0x10 + + /* cpufreq command type */ + #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) + #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) + #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) + #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) + + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + + /* enable/disable turbo mode when in dbs governor */ + #define XEN_SYSCTL_pm_op_enable_turbo 0x26 + #define XEN_SYSCTL_pm_op_disable_turbo 0x27 + + uint32_t cmd; + uint32_t cpuid; + union { + struct xen_get_cpufreq_para get_para; + struct xen_set_cpufreq_gov set_gov; + struct xen_set_cpufreq_para set_para; + uint64_aligned_t get_avgfreq; + uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; + } u; +}; + +/* XEN_SYSCTL_page_offline_op */ +struct xen_sysctl_page_offline_op { + /* IN: range of page to be offlined */ +#define sysctl_page_offline 1 +#define sysctl_page_online 2 +#define sysctl_query_page_offline 3 + uint32_t cmd; + uint32_t start; + uint32_t end; + /* OUT: result of page offline request */ + /* + * bit 0~15: result flags + * bit 16~31: owner + */ + XEN_GUEST_HANDLE(uint32) status; +}; + +#define PG_OFFLINE_STATUS_MASK (0xFFUL) + +/* The result is invalid, i.e. HV does not handle it */ +#define PG_OFFLINE_INVALID (0x1UL << 0) + +#define PG_OFFLINE_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_PENDING (0x1UL << 2) +#define PG_OFFLINE_FAILED (0x1UL << 3) +#define PG_OFFLINE_AGAIN (0x1UL << 4) + +#define PG_ONLINE_FAILED PG_OFFLINE_FAILED +#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED + +#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) +#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) +#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) + +#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) + +/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ +#define PG_OFFLINE_XENPAGE (0x1UL << 8) +#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) +#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) +#define PG_OFFLINE_OWNED (0x1UL << 12) + +#define PG_OFFLINE_BROKEN (0x1UL << 13) +#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN + +#define PG_OFFLINE_OWNER_SHIFT 16 + +/* XEN_SYSCTL_lockprof_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ +#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ +/* Record-type: */ +#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ +#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ +#define LOCKPROF_TYPE_N 2 /* number of types */ +struct xen_sysctl_lockprof_data { + char name[40]; /* lock name (may include up to 2 %d specifiers) */ + int32_t type; /* LOCKPROF_TYPE_??? */ + int32_t idx; /* index (e.g. domain id) */ + uint64_aligned_t lock_cnt; /* # of locking succeeded */ + uint64_aligned_t block_cnt; /* # of wait for lock */ + uint64_aligned_t lock_time; /* nsecs lock held */ + uint64_aligned_t block_time; /* nsecs waited for lock */ +}; +typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); +struct xen_sysctl_lockprof_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ + uint32_t max_elem; /* size of output buffer */ + /* OUT variables (query only). */ + uint32_t nr_elem; /* number of elements available */ + uint64_aligned_t time; /* nsecs of profile measurement */ + /* profile information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; +}; +typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); + +/* XEN_SYSCTL_topologyinfo */ +#define INVALID_TOPOLOGY_ID (~0U) +struct xen_sysctl_topologyinfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + * If OUT is leass than IN then the array tails are not written by sysctl. + */ + uint32_t max_cpu_index; + + /* + * If not NULL, these arrays are filled with core/socket/node identifier + * for each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written to each array. + * The number of array elements written by the sysctl is: + * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +/* XEN_SYSCTL_numainfo */ +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest node identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + */ + uint32_t max_node_index; + + /* NB. Entries are 0 if node is not present. */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + /* + * Array, of size (max_node_index+1)^2, listing memory access distances + * between nodes. If an entry has no node distance information (e.g., node + * not present) then the value ~0u is written. + * + * Note that the array rows must be indexed by multiplying by the minimum + * of the caller-provided max_node_index and the returned value of + * max_node_index. That is, if the largest node index in the system is + * smaller than the caller can handle, a smaller 2-d array is constructed + * within the space provided by the caller. When this occurs, trailing + * space provided by the caller is not modified. If the largest node index + * in the system is larger than the caller can handle, then a 2-d array of + * the maximum size handleable by the caller is constructed. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + +/* XEN_SYSCTL_cpupool_op */ +#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_sysctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_cpumap cpumap; /* OUT: IF */ +}; +typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); + +#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 +/* + * This structure is used to pass a new ARINC653 schedule from a + * privileged domain (ie dom0) to Xen. + */ +struct xen_sysctl_arinc653_schedule { + /* major_frame holds the time for the new schedule's major frame + * in nanoseconds. */ + uint64_aligned_t major_frame; + /* num_sched_entries holds how many of the entries in the + * sched_entries[] array are valid. */ + uint8_t num_sched_entries; + /* The sched_entries array holds the actual schedule entries. */ + struct { + /* dom_handle must match a domain's UUID */ + xen_domain_handle_t dom_handle; + /* If a domain has multiple VCPUs, vcpu_id specifies which one + * this schedule entry applies to. It should be set to 0 if + * there is only one VCPU for the domain. */ + unsigned int vcpu_id; + /* runtime specifies the amount of time that should be allocated + * to this VCPU per major frame. It is specified in nanoseconds */ + uint64_aligned_t runtime; + } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; +}; +typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); + +struct xen_sysctl_credit_schedule { + /* Length of timeslice in milliseconds */ +#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 +#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 + unsigned tslice_ms; + /* Rate limit (minimum timeslice) in microseconds */ +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 + unsigned ratelimit_us; +}; +typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); + +/* XEN_SYSCTL_scheduler_op */ +/* Set or get info? */ +#define XEN_SYSCTL_SCHEDOP_putinfo 0 +#define XEN_SYSCTL_SCHEDOP_getinfo 1 +struct xen_sysctl_scheduler_op { + uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ + uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ + uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ + union { + struct xen_sysctl_sched_arinc653 { + XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; + } sched_arinc653; + struct xen_sysctl_credit_schedule sched_credit; + } u; +}; +typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); struct xen_sysctl { uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ union { struct xen_sysctl_readconsole readconsole; struct xen_sysctl_tbuf_op tbuf_op; struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; struct xen_sysctl_sched_id sched_id; struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; @@ -289,6 +630,11 @@ struct xen_sysctl_availheap availheap; struct xen_sysctl_get_pmstat get_pmstat; struct xen_sysctl_cpu_hotplug cpu_hotplug; + struct xen_sysctl_pm_op pm_op; + struct xen_sysctl_page_offline_op page_offline; + struct xen_sysctl_lockprof_op lockprof_op; + struct xen_sysctl_cpupool_op cpupool_op; + struct xen_sysctl_scheduler_op scheduler_op; uint8_t pad[128]; } u; }; ==== - //SpectraBSD/stable/sys/xen/interface/tmem.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/trace.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/trace.h#1 (text) ==== content @@ -38,6 +38,8 @@ #define TRC_MEM 0x0010f000 /* Xen memory trace */ #define TRC_PV 0x0020f000 /* Xen PV traces */ #define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ #define TRC_ALL 0x0ffff000 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) #define TRC_HD_CYCLE_FLAG (1UL<<31) @@ -52,14 +54,20 @@ #define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ #define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ #define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ +/* Trace classes for Hardware */ +#define TRC_HW_PM 0x00801000 /* Power management traces */ +#define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ + /* Trace events per class */ #define TRC_LOST_RECORDS (TRC_GEN + 1) #define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) #define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) -#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) #define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) #define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) #define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) @@ -75,10 +83,17 @@ #define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) #define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) +#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) #define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) #define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) #define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) +#define TRC_MEM_SET_P2M_ENTRY (TRC_MEM + 4) +#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5) +#define TRC_MEM_POD_POPULATE (TRC_MEM + 16) +#define TRC_MEM_POD_ZERO_RECLAIM (TRC_MEM + 17) +#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18) + #define TRC_PV_HYPERCALL (TRC_PV + 1) #define TRC_PV_TRAP (TRC_PV + 3) @@ -111,6 +126,7 @@ #define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) /* trace events per subclass */ +#define TRC_HVM_NESTEDFLAG (0x400) #define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) #define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) #define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) @@ -140,12 +156,38 @@ #define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) #define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) #define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) -#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16) -#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17) +#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) #define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) #define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) #define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) +#define TRC_HVM_RDTSC (TRC_HVM_HANDLER + 0x1a) +#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) +#define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21) +#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22) +#define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) +#define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) +#define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) +#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) + +/* trace events for per class */ +#define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) +#define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) +#define TRC_PM_IDLE_EXIT (TRC_HW_PM + 0x03) + +/* Trace events for IRQs */ +#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1) +#define TRC_HW_IRQ_MOVE_CLEANUP (TRC_HW_IRQ + 0x2) +#define TRC_HW_IRQ_BIND_VECTOR (TRC_HW_IRQ + 0x3) +#define TRC_HW_IRQ_CLEAR_VECTOR (TRC_HW_IRQ + 0x4) +#define TRC_HW_IRQ_MOVE_FINISH (TRC_HW_IRQ + 0x5) +#define TRC_HW_IRQ_ASSIGN_VECTOR (TRC_HW_IRQ + 0x6) +#define TRC_HW_IRQ_UNMAPPED_VECTOR (TRC_HW_IRQ + 0x7) +#define TRC_HW_IRQ_HANDLED (TRC_HW_IRQ + 0x8) + + /* This structure represents a single trace buffer record. */ struct t_rec { uint32_t event:28; @@ -180,6 +222,16 @@ /* Records follow immediately after the meta-data header. */ }; +/* Structure used to pass MFNs to the trace buffers back to trace consumers. + * Offset is an offset into the mapped structure where the mfn list will be held. + * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + #endif /* __XEN_PUBLIC_TRACE_H__ */ /* ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/vcpu.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/vcpu.h#1 (text) ==== content @@ -185,8 +185,7 @@ /* * Get the physical ID information for a pinned vcpu's underlying physical * processor. The physical ID informmation is architecture-specific. - * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and - * greater are reserved. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. * This command returns -EINVAL if it is not a valid operation for this VCPU. */ #define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ @@ -195,10 +194,36 @@ }; typedef struct vcpu_get_physid vcpu_get_physid_t; DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); -#define xen_vcpu_physid_to_x86_apicid(physid) \ - ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid))) -#define xen_vcpu_physid_to_x86_acpiid(physid) \ - ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32))) +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/version.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/version.h#1 (text) ==== content @@ -78,6 +78,9 @@ /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + #endif /* __XEN_PUBLIC_VERSION_H__ */ /* ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/xen-compat.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/xen-compat.h#1 (text) ==== content @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040200 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/xen.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/xen.h#1 (text) ==== content @@ -30,9 +30,11 @@ #include "xen-compat.h" #if defined(__i386__) || defined(__x86_64__) -#include "arch-x86/xen.h" +#include #elif defined(__ia64__) -#include "arch-ia64.h" +#include +#elif defined(__arm__) +#include #else #error "Unsupported architecture" #endif @@ -47,6 +49,7 @@ __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); DEFINE_XEN_GUEST_HANDLE(void); +DEFINE_XEN_GUEST_HANDLE(uint64_t); DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif @@ -54,6 +57,10 @@ * HYPERCALLS */ +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 #define __HYPERVISOR_set_gdt 2 @@ -91,6 +98,8 @@ #define __HYPERVISOR_sysctl 35 #define __HYPERVISOR_domctl 36 #define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -102,6 +111,8 @@ #define __HYPERVISOR_arch_6 54 #define __HYPERVISOR_arch_7 55 +/* ` } */ + /* * HYPERCALL COMPATIBILITY. */ @@ -135,6 +146,7 @@ * The latter can be allocated only once per guest: they must initially be * allocated to VCPU0 but can subsequently be re-bound. */ +/* ` enum virq { */ #define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ #define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ #define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ @@ -143,6 +155,10 @@ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ #define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -153,26 +169,72 @@ #define VIRQ_ARCH_5 21 #define VIRQ_ARCH_6 22 #define VIRQ_ARCH_7 23 +/* ` } */ #define NR_VIRQS 24 /* - * MMU-UPDATE REQUESTS + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 * - * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. - * A foreigndom (FD) can be specified (or DOMID_SELF for none). - * Where the FD has some effect, it is described below. - * ptr[1:0] specifies the appropriate MMU_* command. - * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new - * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen's heap space. * ptr[:2] -- Machine address of the page-table entry to modify. * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). * * ptr[1:0] == MMU_MACHPHYS_UPDATE: * Updates an entry in the machine->pseudo-physical mapping table. @@ -183,6 +245,72 @@ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follow (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +---+----+----+----+-----+----+----+ + * WC | WC | WB | UC | UC- | WC | WB | <= Linux + * +---+----+----+----+-----+----+----+ + * WC | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +---+----+----+----+-----+----+----+ + * WC | WP | WC | UC | UC- | WT | WB | <= Xen + * +---+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ @@ -227,10 +355,24 @@ * * cmd: MMUEXT_FLUSH_CACHE * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. * * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -247,12 +389,18 @@ #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 #ifndef __ASSEMBLY__ struct mmuext_op { unsigned int cmd; union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; @@ -262,10 +410,12 @@ unsigned int nr_ents; /* TLB_FLUSH_MULTI, INVLPG_MULTI */ #if __XEN_INTERFACE_VERSION__ >= 0x00030205 - XEN_GUEST_HANDLE(void) vcpumask; + XEN_GUEST_HANDLE(const_void) vcpumask; #else - void *vcpumask; + const void *vcpumask; #endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; } arg2; }; typedef struct mmuext_op mmuext_op_t; @@ -343,6 +493,16 @@ #define DOMID_XEN (0x7FF2U) /* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ @@ -442,7 +602,7 @@ * of this structure remaining constant. */ struct shared_info { - struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; /* * A domain can create "event channels" on which it can send and receive @@ -501,6 +661,7 @@ * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] * e. bootstrap page tables [pt_base, CR3 (x86)] * f. bootstrap stack [register ESP (x86)] @@ -539,9 +700,14 @@ unsigned long pt_base; /* VIRTUAL address of page directory. */ unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ }; typedef struct start_info start_info_t; @@ -554,12 +720,41 @@ /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; + typedef struct dom0_vga_console_info { uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ #define XEN_VGATYPE_TEXT_MODE_3 0x03 #define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 union { struct { @@ -618,14 +813,23 @@ /* Default definitions for macros used by domctl/sysctl. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) + #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t #endif #ifndef XEN_GUEST_HANDLE_64 #define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) #endif + +#ifndef __ASSEMBLY__ +struct xenctl_cpumap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_cpus; +}; #endif +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_XEN_H__ */ /* ==== //depot/vendor/FreeBSD/stable/9/sys/xen/interface/xenoprof.h#1 (text) - //SpectraBSD/stable/sys/xen/interface/xenoprof.h#1 (text) ==== content @@ -50,7 +50,11 @@ #define XENOPROF_shutdown 13 #define XENOPROF_get_buffer 14 #define XENOPROF_set_backtrace 15 -#define XENOPROF_last_op 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 #define MAX_OPROF_EVENTS 32 #define MAX_OPROF_DOMAINS 25 @@ -64,7 +68,7 @@ }; /* PC value that indicates a special code */ -#define XENOPROF_ESCAPE_CODE ~0UL +#define XENOPROF_ESCAPE_CODE (~0ULL) /* Transient events for the xenoprof->oprofile cpu buf */ #define XENOPROF_TRACE_BEGIN 1 @@ -124,6 +128,16 @@ } xenoprof_passive_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); #endif /* __XEN_PUBLIC_XENOPROF_H__ */ ==== - //SpectraBSD/stable/sys/xen/interface/xsm/flask_op.h#1 ==== ==== //depot/vendor/FreeBSD/stable/9/sys/xen/xen_intr.h#2 (text) - //SpectraBSD/stable/sys/xen/xen_intr.h#1 (text) ==== content @@ -1,103 +1,217 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ +/****************************************************************************** + * xen_intr.h + * + * APIs for managing Xen event channel, virtual IRQ, and physical IRQ + * notifications. + * + * Copyright (c) 2004, K A Fraser + * Copyright (c) 2012, Spectra Logic Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD: stable/9/sys/xen/xen_intr.h 214077 2010-10-19 20:53:30Z gibbs $ + */ #ifndef _XEN_INTR_H_ #define _XEN_INTR_H_ -/* -* The flat IRQ space is divided into two regions: -* 1. A one-to-one mapping of real physical IRQs. This space is only used -* if we have physical device-access privilege. This region is at the -* start of the IRQ space so that existing device drivers do not need -* to be modified to translate physical IRQ numbers into our IRQ space. -* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These -* are bound using the provided bind/unbind functions. -* -* -* $FreeBSD: stable/9/sys/xen/xen_intr.h 214077 2010-10-19 20:53:30Z gibbs $ -*/ +#ifndef __XEN_EVTCHN_PORT_DEFINED__ +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif -#define PIRQ_BASE 0 -#define NR_PIRQS 128 +/** Registered Xen interrupt callback handle. */ +typedef void * xen_intr_handle_t; -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 +/** + * Associate an already allocated local event channel port an interrupt + * handler. + * + * \param dev The device making this bind request. + * \param local_port The event channel to bind. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. + */ +int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type irqflags, xen_intr_handle_t *handlep); -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) +/** + * Allocate a local event channel port, accessible by the specified + * remote/foreign domain and, if successful, associate the port with + * the specified interrupt handler. + * + * \param dev The device making this bind request. + * \param remote_domain Remote domain grant permission to signal the + * newly allocated local port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. + */ +int xen_intr_alloc_and_bind_local_port(device_t dev, + u_int remote_domain, driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* - * Dynamic binding of event channels and VIRQ sources to guest IRQ space. +/** + * Associate the specified interrupt handler with the remote event + * channel port specified by remote_domain and remote_port. + * + * \param dev The device making this bind request. + * \param remote_domain The domain peer for this event channel connection. + * \param remote_port Remote domain's local port number for this event + * channel port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ +int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, + evtchn_port_t remote_port, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type irqflags, + xen_intr_handle_t *handlep); -/* - * Bind a caller port event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +/** + * Associate the specified interrupt handler with the specified Xen + * virtual interrupt source. + * + * \param dev The device making this bind request. + * \param virq The Xen virtual IRQ number for the Xen interrupt + * source being hooked. + * \param cpu The cpu on which interrupt events should be delivered. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, + driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); -/* - * Bind a listening port to an interrupt handler. If successful, the - * guest IRQ number is returned in *irqp. Return zero on success or - * errno otherwise. +/** + * Associate an interprocessor interrupt vector with an interrupt handler. + * + * \param dev The device making this bind request. + * \param ipi The interprocessor interrupt vector number of the + * interrupt source being hooked. + * \param cpu The cpu receiving the IPI. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, + driver_filter_t filter, enum intr_type irqflags, + xen_intr_handle_t *handlep); -/* - * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ - * number is returned in *irqp. Return zero on success or errno - * otherwise. +/** + * Unbind an interrupt handler from its interrupt source. + * + * \param handlep A pointer to the opaque handle that was initialized + * at the time the interrupt source was bound. + * + * \returns 0 on success, otherwise an errno. + * + * \note The event channel, if any, that was allocated at bind time is + * closed upon successful return of this method. + * + * \note It is always safe to call xen_intr_unbind() on a handle that + * has been initilized to NULL. */ -extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - const char *devname, driver_filter_t filter, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp); +void xen_intr_unbind(xen_intr_handle_t *handle); -/* - * Bind an IPI to an interrupt handler. If successful, the guest - * IRQ number is returned in *irqp. Return zero on success or errno - * otherwise. +/** + * Add a description to an interrupt handler. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \param fmt The sprintf compatible format string for the description, + * followed by optional sprintf arguments. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, - const char *devname, driver_filter_t filter, - unsigned long irqflags, unsigned int *irqp); +int +xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); -/* - * Bind an interdomain event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +/** + * Signal the remote peer of an interrupt source associated with an + * event channel port. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \note For xen interrupt sources other than event channel ports, + * this method takes no action. */ -extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, - driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +void xen_intr_signal(xen_intr_handle_t handle); -/* - * Unbind an interrupt handler using the guest IRQ number returned - * when it was bound. +/** + * Get the local event channel port number associated with this interrupt + * source. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \returns 0 if the handle is invalid, otherwise positive port number. */ -extern void unbind_from_irqhandler(unsigned int irq); - -static __inline__ int irq_cannonicalize(unsigned int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - -extern void idle_block(void); -extern int ap_cpu_initclocks(int cpu); +evtchn_port_t xen_intr_port(xen_intr_handle_t handle); #endif /* _XEN_INTR_H_ */ ==== //depot/vendor/FreeBSD/stable/9/sys/xen/xenbus/xenbus.c#2 (text) - //SpectraBSD/stable/sys/xen/xenbus/xenbus.c#1 (text) ==== content @@ -222,42 +222,6 @@ return (0); } -int -xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port) -{ - struct evtchn_alloc_unbound alloc_unbound; - int err; - - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = xenbus_get_otherend_id(dev); - - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - - if (err) { - xenbus_dev_fatal(dev, -err, "allocating event channel"); - return (-err); - } - *port = alloc_unbound.port; - return (0); -} - -int -xenbus_free_evtchn(device_t dev, evtchn_port_t port) -{ - struct evtchn_close close; - int err; - - close.port = port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); - if (err) { - xenbus_dev_error(dev, -err, "freeing event channel %d", port); - return (-err); - } - return (0); -} - XenbusState xenbus_read_driver_state(const char *path) { ==== //depot/vendor/FreeBSD/stable/9/sys/xen/xenbus/xenbusvar.h#3 (text) - //SpectraBSD/stable/sys/xen/xenbus/xenbusvar.h#1 (text) ==== content @@ -195,39 +195,6 @@ int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp); /** - * Allocate an event channel for the given XenBus device. - * - * \param dev The device for which to allocate the event channel. - * \param port[out] The port identifier for the allocated event channel. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * A successfully allocated event channel should be free'd using - * xenbus_free_evtchn(). - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port); - -/** - * Free an existing event channel. - * - * \param dev The device which allocated this event channel. - * \param port The port identifier for the event channel to free. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_free_evtchn(device_t dev, evtchn_port_t port); - -/** * Record the given errno, along with the given, printf-style, formatted * message in dev's device specific error node in the XenStore. * ==== //depot/vendor/FreeBSD/stable/9/sys/xen/xenstore/xenstore.c#3 (text) - //SpectraBSD/stable/sys/xen/xenstore/xenstore.c#1 (text) ==== content @@ -52,12 +52,12 @@ #include #include -#include #include #include #include #include +#include #include #include @@ -243,8 +243,8 @@ */ int evtchn; - /** Interrupt number for our event channel. */ - u_int irq; + /** Handle for XenStore interrupts. */ + xen_intr_handle_t xen_intr_handle; /** * Interrupt driven config hook allowing us to defer @@ -503,11 +503,10 @@ xen_store->req_prod += avail; /* - * notify_remote_via_evtchn implies mb(). The other side - * will see the change to req_prod at the time of the - * interrupt. + * evtchn_signal() implies mb(). The other side will see + * the change to req_prod at the time of the interrupt. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); @@ -559,7 +558,7 @@ * when msleep returns. */ error = msleep(xen_store, &xs.ring_lock, PCATCH|PDROP, - "xbread", /*timout*/0); + "xbread", /*timeout*/0); if (error && error != EWOULDBLOCK) return (error); continue; @@ -595,11 +594,10 @@ xen_store->rsp_cons += avail; /* - * notify_remote_via_evtchn implies mb(). The producer - * will see the updated consumer index when the event - * is delivered. + * evtchn_signal() implies mb(). The producer will see + * the updated consumer index when the event is delivered. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); @@ -1066,11 +1064,11 @@ xen_store->rsp_cons = xen_store->rsp_prod; } - if (xs.irq) - unbind_from_irqhandler(xs.irq); + xen_intr_unbind(&xs.xen_intr_handle); - error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore", - xs_intr, NULL, INTR_TYPE_NET, &xs.irq); + error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn, + /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE, + &xs.xen_intr_handle); if (error) { log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); return (error); @@ -1166,7 +1164,6 @@ sx_init(&xs.suspend_mutex, "xenstore suspend"); mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); - xs.irq = 0; /* Initialize the shared memory rings to talk to xenstored */ error = xs_init_comms();