Change 535080 by willa@willa_repo on 2012/04/05 15:29:09 Upgrade Xen interface headers to Xen 4.2. FreeBSD drivers are still using interface version 0x00030204, but could be updated to 0x0003020a with updates for Xen interface changes. This version is the newest supported by the Xen hypervisor we're using. sys/xen/hvm.h: - Replace the implementation of hvm_get_parameter(), formerly located in sys/xen/interface/hvm/params.h. Linux has a similar file which primarily stores this function. sys/xen/xenstore/xenstore.c: - Include the new header file to get hvm_get_parameter(). Affected files ... ... //depot/SpectraBSD/stable/9/sys/xen/hvm.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-arm.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-arm/hvm/save.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64/debug_op.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64/hvm/memmap.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64/hvm/save.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64/sioemu.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/cpuid.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/hvm/save.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-mca.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_32.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_64.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86_64.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/domctl.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/elfnote.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/event_channel.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/features.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/grant_table.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/hvm_info_table.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/hvm_op.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/ioreq.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/params.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/save.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/fsif.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/libxenvchan.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/pciif.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/protocols.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/ring.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/usbif.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/vscsiif.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/io/xs_wire.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/kexec.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/mem_event.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/memory.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/physdev.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/platform.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/sched.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/sysctl.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/tmem.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/interface/trace.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/vcpu.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/version.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/xen-compat.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/xen.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/xenoprof.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/xsm/flask_op.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/xenstore/xenstore.c#3 edit Differences ... ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-ia64.h#2 (text) ==== @@ -49,10 +49,11 @@ #define XEN_GUEST_HANDLE(name) __guest_handle_ ## name #define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) #define uint64_aligned_t uint64_t -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #ifndef __ASSEMBLY__ typedef unsigned long xen_pfn_t; @@ -66,7 +67,7 @@ /* Maximum number of virtual CPUs in multi-processor guests. */ /* WARNING: before changing this, check that shared_info fits on a page */ -#define MAX_VIRT_CPUS 64 +#define XEN_LEGACY_MAX_VCPUS 64 /* IO ports location for PV. */ #define IO_PORTS_PADDR 0x00000ffffc000000UL @@ -198,6 +199,15 @@ unsigned long rrs[8]; // region registers unsigned long krs[8]; // kernel registers unsigned long tmp[16]; // temp registers (e.g. for hyperprivops) + + /* itc paravirtualization + * vAR.ITC = mAR.ITC + itc_offset + * itc_last is one which was lastly passed to + * the guest OS in order to prevent it from + * going backwords. + */ + unsigned long itc_offset; + unsigned long itc_last; }; }; }; @@ -392,6 +402,7 @@ #define VGCF_EXTRA_REGS (1UL << 1) /* Set extra regs. */ #define VGCF_SET_CR_IRR (1UL << 2) /* Set cr_irr[0:3]. */ #define VGCF_online (1UL << 3) /* make this vcpu online */ +#define VGCF_SET_AR_ITC (1UL << 4) /* set pv ar.itc. itc_offset, itc_last */ unsigned long flags; /* VGCF_* flags */ struct vcpu_guest_context_regs regs; @@ -453,6 +464,11 @@ /* unexpose the foreign domain's p2m table into privileged domain */ #define IA64_DOM0VP_unexpose_foreign_p2m 13 +/* get memmap_info and memmap. It is possible to map the page directly + by foreign page mapping, but there is a race between writer. + This hypercall avoids such race. */ +#define IA64_DOM0VP_get_memmap 14 + // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 #define ASSIGN_readonly (1UL << _ASSIGN_readonly) ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/cpuid.h#2 (text) ==== @@ -24,7 +24,7 @@ * Copyright (c) 2007 Citrix Systems, Inc. * * Authors: - * Keir Fraser + * Keir Fraser */ #ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/hvm/save.h#2 (text) ==== @@ -38,7 +38,7 @@ uint32_t version; /* File format version */ uint64_t changeset; /* Version of Xen that saved this file */ uint32_t cpuid; /* CPUID[0x01][%eax] on the saving machine */ - uint32_t pad0; + uint32_t gtsc_khz; /* Guest's TSC frequency in kHz */ }; DECLARE_HVM_SAVE_TYPE(HEADER, 1, struct hvm_save_header); @@ -46,6 +46,8 @@ /* * Processor + * + * Compat: Pre-3.4 didn't have msr_tsc_aux */ struct hvm_hw_cpu { @@ -123,9 +125,116 @@ uint32_t tr_arbytes; uint32_t ldtr_arbytes; - uint32_t sysenter_cs; - uint32_t padding0; + uint64_t sysenter_cs; + uint64_t sysenter_esp; + uint64_t sysenter_eip; + + /* msr for em64t */ + uint64_t shadow_gs; + + /* msr content saved/restored. */ + uint64_t msr_flags; + uint64_t msr_lstar; + uint64_t msr_star; + uint64_t msr_cstar; + uint64_t msr_syscall_mask; + uint64_t msr_efer; + uint64_t msr_tsc_aux; + + /* guest's idea of what rdtsc() would return */ + uint64_t tsc; + + /* pending event, if any */ + union { + uint32_t pending_event; + struct { + uint8_t pending_vector:8; + uint8_t pending_type:3; + uint8_t pending_error_valid:1; + uint32_t pending_reserved:19; + uint8_t pending_valid:1; + }; + }; + /* error code for pending event */ + uint32_t error_code; +}; + +struct hvm_hw_cpu_compat { + uint8_t fpu_regs[512]; + + uint64_t rax; + uint64_t rbx; + uint64_t rcx; + uint64_t rdx; + uint64_t rbp; + uint64_t rsi; + uint64_t rdi; + uint64_t rsp; + uint64_t r8; + uint64_t r9; + uint64_t r10; + uint64_t r11; + uint64_t r12; + uint64_t r13; + uint64_t r14; + uint64_t r15; + + uint64_t rip; + uint64_t rflags; + + uint64_t cr0; + uint64_t cr2; + uint64_t cr3; + uint64_t cr4; + + uint64_t dr0; + uint64_t dr1; + uint64_t dr2; + uint64_t dr3; + uint64_t dr6; + uint64_t dr7; + + uint32_t cs_sel; + uint32_t ds_sel; + uint32_t es_sel; + uint32_t fs_sel; + uint32_t gs_sel; + uint32_t ss_sel; + uint32_t tr_sel; + uint32_t ldtr_sel; + + uint32_t cs_limit; + uint32_t ds_limit; + uint32_t es_limit; + uint32_t fs_limit; + uint32_t gs_limit; + uint32_t ss_limit; + uint32_t tr_limit; + uint32_t ldtr_limit; + uint32_t idtr_limit; + uint32_t gdtr_limit; + + uint64_t cs_base; + uint64_t ds_base; + uint64_t es_base; + uint64_t fs_base; + uint64_t gs_base; + uint64_t ss_base; + uint64_t tr_base; + uint64_t ldtr_base; + uint64_t idtr_base; + uint64_t gdtr_base; + + uint32_t cs_arbytes; + uint32_t ds_arbytes; + uint32_t es_arbytes; + uint32_t fs_arbytes; + uint32_t gs_arbytes; + uint32_t ss_arbytes; + uint32_t tr_arbytes; + uint32_t ldtr_arbytes; + uint64_t sysenter_cs; uint64_t sysenter_esp; uint64_t sysenter_eip; @@ -139,6 +248,7 @@ uint64_t msr_cstar; uint64_t msr_syscall_mask; uint64_t msr_efer; + /*uint64_t msr_tsc_aux; COMPAT */ /* guest's idea of what rdtsc() would return */ uint64_t tsc; @@ -158,8 +268,22 @@ uint32_t error_code; }; -DECLARE_HVM_SAVE_TYPE(CPU, 2, struct hvm_hw_cpu); +static inline int _hvm_hw_fix_cpu(void *h) { + struct hvm_hw_cpu *new=h; + struct hvm_hw_cpu_compat *old=h; + + /* If we copy from the end backwards, we should + * be able to do the modification in-place */ + new->error_code=old->error_code; + new->pending_event=old->pending_event; + new->tsc=old->tsc; + new->msr_tsc_aux=0; + + return 0; +} +DECLARE_HVM_SAVE_TYPE_COMPAT(CPU, 2, struct hvm_hw_cpu, \ + struct hvm_hw_cpu_compat, _hvm_hw_fix_cpu); /* * PIC @@ -220,12 +344,7 @@ * IO-APIC */ -#ifdef __ia64__ -#define VIOAPIC_IS_IOSAPIC 1 -#define VIOAPIC_NUM_PINS 24 -#else #define VIOAPIC_NUM_PINS 48 /* 16 ISA IRQs, 32 non-legacy PCI IRQS. */ -#endif struct hvm_hw_vioapic { uint64_t base_address; @@ -244,13 +363,8 @@ uint8_t trig_mode:1; uint8_t mask:1; uint8_t reserve:7; -#if !VIOAPIC_IS_IOSAPIC uint8_t reserved[4]; uint8_t dest_id; -#else - uint8_t reserved[3]; - uint16_t dest_id; -#endif } fields; } redirtbl[VIOAPIC_NUM_PINS]; }; @@ -266,6 +380,7 @@ uint64_t apic_base_msr; uint32_t disabled; /* VLAPIC_xx_DISABLED */ uint32_t timer_divisor; + uint64_t tdt_msr; }; DECLARE_HVM_SAVE_TYPE(LAPIC, 5, struct hvm_hw_lapic); @@ -287,7 +402,7 @@ * Indexed by: device*4 + INTx#. */ union { - DECLARE_BITMAP(i, 32*4); + unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */ uint64_t pad[2]; }; }; @@ -300,7 +415,7 @@ * Indexed by ISA IRQ (assumes no ISA-device IRQ sharing). */ union { - DECLARE_BITMAP(i, 16); + unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */ uint64_t pad[1]; }; }; @@ -421,9 +536,54 @@ DECLARE_HVM_SAVE_TYPE(MTRR, 14, struct hvm_hw_mtrr); +/* + * The save area of XSAVE/XRSTOR. + */ + +struct hvm_hw_cpu_xsave { + uint64_t xfeature_mask; + uint64_t xcr0; /* Updated by XSETBV */ + uint64_t xcr0_accum; /* Updated by XSETBV */ + struct { + struct { char x[512]; } fpu_sse; + + struct { + uint64_t xstate_bv; /* Updated by XRSTOR */ + uint64_t reserved[7]; + } xsave_hdr; /* The 64-byte header */ + + struct { char x[0]; } ymm; /* YMM */ + } save_area; +}; + +#define CPU_XSAVE_CODE 16 + +/* + * Viridian hypervisor context. + */ + +struct hvm_viridian_domain_context { + uint64_t hypercall_gpa; + uint64_t guest_os_id; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_DOMAIN, 15, struct hvm_viridian_domain_context); + +struct hvm_viridian_vcpu_context { + uint64_t apic_assist; +}; + +DECLARE_HVM_SAVE_TYPE(VIRIDIAN_VCPU, 17, struct hvm_viridian_vcpu_context); + +struct hvm_vmce_vcpu { + uint64_t caps; +}; + +DECLARE_HVM_SAVE_TYPE(VMCE_VCPU, 18, struct hvm_vmce_vcpu); + /* * Largest type-code in use */ -#define HVM_SAVE_CODE_MAX 14 +#define HVM_SAVE_CODE_MAX 18 #endif /* __XEN_PUBLIC_HVM_SAVE_X86_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-mca.h#2 (text) ==== @@ -56,13 +56,20 @@ /* Hypercall */ #define __HYPERVISOR_mca __HYPERVISOR_arch_0 -#define XEN_MCA_INTERFACE_VERSION 0x03000001 +/* + * The xen-unstable repo has interface version 0x03000001; out interface + * is incompatible with that and any future minor revisions, so we + * choose a different version number range that is numerically less + * than that used in xen-unstable. + */ +#define XEN_MCA_INTERFACE_VERSION 0x01ecc003 -/* IN: Dom0 calls hypercall from MC event handler. */ -#define XEN_MC_CORRECTABLE 0x0 -/* IN: Dom0/DomU calls hypercall from MC trap handler. */ -#define XEN_MC_TRAP 0x1 -/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */ +/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */ +#define XEN_MC_NONURGENT 0x0001 +/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */ +#define XEN_MC_URGENT 0x0002 +/* IN: Dom0 acknowledges previosly-fetched telemetry */ +#define XEN_MC_ACK 0x0004 /* OUT: All is ok */ #define XEN_MC_OK 0x0 @@ -97,6 +104,7 @@ #define MC_TYPE_GLOBAL 0 #define MC_TYPE_BANK 1 #define MC_TYPE_EXTENDED 2 +#define MC_TYPE_RECOVERY 3 struct mcinfo_common { uint16_t type; /* structure type */ @@ -106,19 +114,24 @@ #define MC_FLAG_CORRECTABLE (1 << 0) #define MC_FLAG_UNCORRECTABLE (1 << 1) - +#define MC_FLAG_RECOVERABLE (1 << 2) +#define MC_FLAG_POLLED (1 << 3) +#define MC_FLAG_RESET (1 << 4) +#define MC_FLAG_CMCI (1 << 5) +#define MC_FLAG_MCE (1 << 6) /* contains global x86 mc information */ struct mcinfo_global { struct mcinfo_common common; /* running domain at the time in error (most likely the impacted one) */ uint16_t mc_domid; + uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ uint32_t mc_socketid; /* physical socket of the physical core */ uint16_t mc_coreid; /* physical impacted core */ uint16_t mc_core_threadid; /* core thread of physical core */ - uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */ + uint32_t mc_apicid; + uint32_t mc_flags; uint64_t mc_gstatus; /* global status */ - uint32_t mc_flags; }; /* contains bank local x86 mc information */ @@ -132,6 +145,8 @@ uint64_t mc_addr; /* bank address, only valid * if addr bit is set in mc_status */ uint64_t mc_misc; + uint64_t mc_ctrl2; + uint64_t mc_tsc; }; @@ -150,21 +165,121 @@ * multiple times. */ uint32_t mc_msrs; /* Number of msr with valid values. */ - struct mcinfo_msr mc_msr[5]; + /* + * Currently Intel extended MSR (32/64) include all gp registers + * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be + * useful at present. So expand this array to 16/32 to leave room. + */ + struct mcinfo_msr mc_msr[sizeof(void *) * 4]; +}; + +/* Recovery Action flags. Giving recovery result information to DOM0 */ + +/* Xen takes successful recovery action, the error is recovered */ +#define REC_ACTION_RECOVERED (0x1 << 0) +/* No action is performed by XEN */ +#define REC_ACTION_NONE (0x1 << 1) +/* It's possible DOM0 might take action ownership in some case */ +#define REC_ACTION_NEED_RESET (0x1 << 2) + +/* Different Recovery Action types, if the action is performed successfully, + * REC_ACTION_RECOVERED flag will be returned. + */ + +/* Page Offline Action */ +#define MC_ACTION_PAGE_OFFLINE (0x1 << 0) +/* CPU offline Action */ +#define MC_ACTION_CPU_OFFLINE (0x1 << 1) +/* L3 cache disable Action */ +#define MC_ACTION_CACHE_SHRINK (0x1 << 2) + +/* Below interface used between XEN/DOM0 for passing XEN's recovery action + * information to DOM0. + * usage Senario: After offlining broken page, XEN might pass its page offline + * recovery action result to DOM0. DOM0 will save the information in + * non-volatile memory for further proactive actions, such as offlining the + * easy broken page earlier when doing next reboot. +*/ +struct page_offline_action +{ + /* Params for passing the offlined page number to DOM0 */ + uint64_t mfn; + uint64_t status; +}; + +struct cpu_offline_action +{ + /* Params for passing the identity of the offlined CPU to DOM0 */ + uint32_t mc_socketid; + uint16_t mc_coreid; + uint16_t mc_core_threadid; +}; + +#define MAX_UNION_SIZE 16 +struct mcinfo_recovery +{ + struct mcinfo_common common; + uint16_t mc_bank; /* bank nr */ + uint8_t action_flags; + uint8_t action_types; + union { + struct page_offline_action page_retire; + struct cpu_offline_action cpu_offline; + uint8_t pad[MAX_UNION_SIZE]; + } action_info; }; + #define MCINFO_HYPERCALLSIZE 1024 #define MCINFO_MAXSIZE 768 +#define MCINFO_FLAGS_UNCOMPLETE 0x1 struct mc_info { /* Number of mcinfo_* entries in mi_data */ uint32_t mi_nentries; - - uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)]; + uint32_t flags; + uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8]; }; typedef struct mc_info mc_info_t; +DEFINE_XEN_GUEST_HANDLE(mc_info_t); +#define __MC_MSR_ARRAYSIZE 8 +#define __MC_NMSRS 1 +#define MC_NCAPS 7 /* 7 CPU feature flag words */ +#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */ +#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */ +#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */ +#define MC_CAPS_LINUX 3 /* Linux-defined */ +#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */ +#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */ +#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */ +struct mcinfo_logical_cpu { + uint32_t mc_cpunr; + uint32_t mc_chipid; + uint16_t mc_coreid; + uint16_t mc_threadid; + uint32_t mc_apicid; + uint32_t mc_clusterid; + uint32_t mc_ncores; + uint32_t mc_ncores_active; + uint32_t mc_nthreads; + int32_t mc_cpuid_level; + uint32_t mc_family; + uint32_t mc_vendor; + uint32_t mc_model; + uint32_t mc_step; + char mc_vendorid[16]; + char mc_brandid[64]; + uint32_t mc_cpu_caps[MC_NCAPS]; + uint32_t mc_cache_size; + uint32_t mc_cache_alignment; + int32_t mc_nmsrvals; + struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE]; +}; +typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t; +DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t); + /* * OS's should use these instead of writing their own lookup function @@ -181,12 +296,12 @@ * struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi); */ #define x86_mcinfo_first(_mi) \ - (struct mcinfo_common *)((_mi)->mi_data) + ((struct mcinfo_common *)(_mi)->mi_data) /* Prototype: * struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic); */ #define x86_mcinfo_next(_mic) \ - (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size) + ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)) /* Prototype: * void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type); @@ -228,14 +343,15 @@ #define XEN_MC_fetch 1 struct xen_mc_fetch { /* IN/OUT variables. */ - uint32_t flags; - -/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */ -/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT, + XEN_MC_ACK if ack'ing an earlier fetch */ + /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, + XEN_MC_NODATA, XEN_MC_NOMATCH */ + uint32_t _pad0; + uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */ /* OUT variables. */ - uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */ - struct mc_info mc_info; + XEN_GUEST_HANDLE(mc_info_t) data; }; typedef struct xen_mc_fetch xen_mc_fetch_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t); @@ -250,7 +366,6 @@ uint16_t mc_domid; /* The unprivileged domain to notify. */ uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify. * Usually echo'd value from the fetch hypercall. */ - uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */ /* IN/OUT variables. */ uint32_t flags; @@ -261,14 +376,60 @@ typedef struct xen_mc_notifydomain xen_mc_notifydomain_t; DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t); +#define XEN_MC_physcpuinfo 3 +struct xen_mc_physcpuinfo { + /* IN/OUT */ + uint32_t ncpus; + uint32_t _pad0; + /* OUT */ + XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info; +}; + +#define XEN_MC_msrinject 4 +#define MC_MSRINJ_MAXMSRS 8 +struct xen_mc_msrinject { + /* IN */ + uint32_t mcinj_cpunr; /* target processor id */ + uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */ + uint32_t mcinj_count; /* 0 .. count-1 in array are valid */ + uint32_t _pad0; + struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS]; +}; + +/* Flags for mcinj_flags above; bits 16-31 are reserved */ +#define MC_MSRINJ_F_INTERPOSE 0x1 +#define XEN_MC_mceinject 5 +struct xen_mc_mceinject { + unsigned int mceinj_cpunr; /* target processor id */ +}; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#define XEN_MC_inject_v2 6 +#define XEN_MC_INJECT_TYPE_MASK 0x7 +#define XEN_MC_INJECT_TYPE_MCE 0x0 +#define XEN_MC_INJECT_TYPE_CMCI 0x1 + +#define XEN_MC_INJECT_CPU_BROADCAST 0x8 + +struct xen_mc_inject_v2 { + uint32_t flags; + struct xenctl_cpumap cpumap; +}; +#endif + struct xen_mc { uint32_t cmd; uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */ union { struct xen_mc_fetch mc_fetch; struct xen_mc_notifydomain mc_notifydomain; - uint8_t pad[MCINFO_HYPERCALLSIZE]; + struct xen_mc_physcpuinfo mc_physcpuinfo; + struct xen_mc_msrinject mc_msrinject; + struct xen_mc_mceinject mc_mceinject; +#if defined(__XEN__) || defined(__XEN_TOOLS__) + struct xen_mc_inject_v2 mc_inject_v2; +#endif } u; }; typedef struct xen_mc xen_mc_t; ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_32.h#2 (text) ==== @@ -39,16 +39,7 @@ * Clobbered: Argument registers (e.g., 2-arg hypercall clobbers %ebx,%ecx) */ -#if __XEN_INTERFACE_VERSION__ < 0x00030203 /* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; int $0x82 - */ -#define TRAP_INSTR "int $0x82" -#endif - -/* * These flat segments are in the Xen-private section of every GDT. Since these * are also present in the initial GDT, many OSes will be able to avoid * installing their own GDT. @@ -111,8 +102,8 @@ __guest_handle_ ## name; \ typedef struct { union { type *p; uint64_aligned_t q; }; } \ __guest_handle_64_ ## name -#undef set_xen_guest_handle -#define set_xen_guest_handle(hnd, val) \ +#undef set_xen_guest_handle_raw +#define set_xen_guest_handle_raw(hnd, val) \ do { if ( sizeof(hnd) == 8 ) *(uint64_t *)&(hnd) = 0; \ (hnd).p = val; \ } while ( 0 ) ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen-x86_64.h#2 (text) ==== @@ -36,16 +36,6 @@ * Clobbered: argument registers (e.g., 2-arg hypercall clobbers %rdi,%rsi) */ -#if __XEN_INTERFACE_VERSION__ < 0x00030203 -/* - * Legacy hypercall interface: - * As above, except the entry sequence to the hypervisor is: - * mov $hypercall-number*32,%eax ; syscall - * Clobbered: %rcx, %r11, argument registers (as above) - */ -#define TRAP_INSTR "syscall" -#endif - /* * 64-bit segment selectors * These flat segments are in the Xen-private section of every GDT. Since these ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86/xen.h#2 (text) ==== @@ -32,8 +32,7 @@ #define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef struct { type *p; } __guest_handle_ ## name #else -#error "using old handle" -#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ +#define ___DEFINE_XEN_GUEST_HANDLE(name, type) \ typedef type * __guest_handle_ ## name #endif @@ -43,10 +42,11 @@ #define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name) #define __XEN_GUEST_HANDLE(name) __guest_handle_ ## name #define XEN_GUEST_HANDLE(name) __XEN_GUEST_HANDLE(name) -#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0) +#define set_xen_guest_handle_raw(hnd, val) do { (hnd).p = val; } while (0) #ifdef __XEN_TOOLS__ #define get_xen_guest_handle(val, hnd) do { val = (hnd).p; } while (0) #endif +#define set_xen_guest_handle(hnd, val) set_xen_guest_handle_raw(hnd, val) #if defined(__i386__) #include @@ -63,6 +63,11 @@ * SEGMENT DESCRIPTOR TABLES */ /* + * ` enum neg_errnoval + * ` HYPERVISOR_set_gdt(const xen_pfn_t frames[], unsigned int entries); + * ` + */ +/* * A number of GDT entries are reserved by Xen. These are not situated at the * start of the GDT because some stupid OSes export hard-coded selector values * in their ABI. These hard-coded values are always near the start of the GDT, @@ -72,15 +77,28 @@ #define FIRST_RESERVED_GDT_BYTE (FIRST_RESERVED_GDT_PAGE * 4096) #define FIRST_RESERVED_GDT_ENTRY (FIRST_RESERVED_GDT_BYTE / 8) -/* Maximum number of virtual CPUs in multi-processor guests. */ -#define MAX_VIRT_CPUS 32 +/* Maximum number of virtual CPUs in legacy multi-processor guests. */ +#define XEN_LEGACY_MAX_VCPUS 32 #ifndef __ASSEMBLY__ typedef unsigned long xen_ulong_t; /* + * ` enum neg_errnoval + * ` HYPERVISOR_stack_switch(unsigned long ss, unsigned long esp); + * ` + * Sets the stack segment and pointer for the current vcpu. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_trap_table(const struct trap_info traps[]); + * ` + */ +/* * Send an array of these to HYPERVISOR_set_trap_table(). + * Terminate the array with a sentinel entry, with traps[].address==0. * The privilege level specifies which modes may enter a trap via a software * interrupt. On x86/64, since rings 1 and 2 are unavailable, we allocate * privilege levels as follows: @@ -147,7 +165,7 @@ unsigned int event_callback_cs; /* compat CS of event cb */ unsigned int failsafe_callback_cs; /* compat CS of failsafe cb */ }; - } u; + }; #else unsigned long syscall_callback_eip; #endif @@ -175,6 +193,24 @@ #endif /* !__ASSEMBLY__ */ /* + * ` enum neg_errnoval + * ` HYPERVISOR_fpu_taskswitch(int set); + * ` + * Sets (if set!=0) or clears (if set==0) CR0.TS. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_debugreg(int regno, unsigned long value); + * + * ` unsigned long + * ` HYPERVISOR_get_debugreg(int regno); + * For 0<=reg<=7, returns the debug register value. + * For other values of reg, returns ((unsigned long)-EINVAL). + * (Unfortunately, this interface is defective.) + */ + +/* * Prefix forces emulation of some non-trapping instructions. * Currently only CPUID. */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/arch-x86_64.h#2 (text) ==== @@ -24,4 +24,20 @@ * Copyright (c) 2004-2006, K A Fraser */ -#include "arch-x86/xen.h" +#include + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_set_callbacks(unsigned long event_selector, + * ` unsigned long event_address, + * ` unsigned long failsafe_selector, + * ` unsigned long failsafe_address); + * ` + * Register for callbacks on events. When an event (from an event + * channel) occurs, event_address is used as the value of eip. + * + * A similar callback occurs if the segment selectors are invalid. + * failsafe_address is used as the value of eip. + * + * On x86_64, event_selector and failsafe_selector are ignored (???). + */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/domctl.h#2 (text) ==== @@ -32,41 +32,38 @@ #error "domctl operations are intended for use by node control tools only" #endif -#include "xen.h" +#include +#include -#define XEN_DOMCTL_INTERFACE_VERSION 0x00000005 +#define XEN_DOMCTL_INTERFACE_VERSION 0x00000008 -struct xenctl_cpumap { - XEN_GUEST_HANDLE_64(uint8_t) bitmap; - uint32_t nr_cpus; -}; - /* * NB. xen_domctl.domain is an IN/OUT parameter for this operation. * If it is specified as zero, an id is auto-allocated and returned. */ -#define XEN_DOMCTL_createdomain 1 +/* XEN_DOMCTL_createdomain */ struct xen_domctl_createdomain { /* IN parameters */ uint32_t ssidref; xen_domain_handle_t handle; /* Is this an HVM guest (as opposed to a PV guest)? */ -#define _XEN_DOMCTL_CDF_hvm_guest 0 -#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) +#define _XEN_DOMCTL_CDF_hvm_guest 0 +#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest) /* Use hardware-assisted paging if available? */ -#define _XEN_DOMCTL_CDF_hap 1 -#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) +#define _XEN_DOMCTL_CDF_hap 1 +#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap) + /* Should domain memory integrity be verifed by tboot during Sx? */ +#define _XEN_DOMCTL_CDF_s3_integrity 2 +#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity) + /* Disable out-of-sync shadow page tables? */ +#define _XEN_DOMCTL_CDF_oos_off 3 +#define XEN_DOMCTL_CDF_oos_off (1U<<_XEN_DOMCTL_CDF_oos_off) uint32_t flags; }; typedef struct xen_domctl_createdomain xen_domctl_createdomain_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_createdomain_t); -#define XEN_DOMCTL_destroydomain 2 -#define XEN_DOMCTL_pausedomain 3 -#define XEN_DOMCTL_unpausedomain 4 -#define XEN_DOMCTL_resumedomain 27 - -#define XEN_DOMCTL_getdomaininfo 5 +/* XEN_DOMCTL_getdomaininfo */ struct xen_domctl_getdomaininfo { /* OUT variables. */ domid_t domain; /* Also echoed in domctl.domain */ @@ -91,34 +88,34 @@ /* Being debugged. */ #define _XEN_DOMINF_debugged 6 #define XEN_DOMINF_debugged (1U<<_XEN_DOMINF_debugged) - /* CPU to which this domain is bound. */ -#define XEN_DOMINF_cpumask 255 -#define XEN_DOMINF_cpushift 8 /* XEN_DOMINF_shutdown guest-supplied code. */ #define XEN_DOMINF_shutdownmask 255 #define XEN_DOMINF_shutdownshift 16 uint32_t flags; /* XEN_DOMINF_* */ uint64_aligned_t tot_pages; uint64_aligned_t max_pages; + uint64_aligned_t shr_pages; + uint64_aligned_t paged_pages; uint64_aligned_t shared_info_frame; /* GMFN of shared_info struct */ uint64_aligned_t cpu_time; uint32_t nr_online_vcpus; /* Number of VCPUs currently online. */ uint32_t max_vcpu_id; /* Maximum VCPUID in use by this domain. */ uint32_t ssidref; xen_domain_handle_t handle; + uint32_t cpupool; }; typedef struct xen_domctl_getdomaininfo xen_domctl_getdomaininfo_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getdomaininfo_t); -#define XEN_DOMCTL_getmemlist 6 +/* XEN_DOMCTL_getmemlist */ struct xen_domctl_getmemlist { /* IN variables. */ /* Max entries to write to output buffer. */ uint64_aligned_t max_pfns; /* Start index in guest's page list. */ uint64_aligned_t start_pfn; - XEN_GUEST_HANDLE_64(uint64_t) buffer; + XEN_GUEST_HANDLE_64(uint64) buffer; /* OUT variables. */ uint64_aligned_t num_pfns; }; @@ -126,7 +123,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getmemlist_t); -#define XEN_DOMCTL_getpageframeinfo 7 +/* XEN_DOMCTL_getpageframeinfo */ #define XEN_DOMCTL_PFINFO_LTAB_SHIFT 28 #define XEN_DOMCTL_PFINFO_NOTAB (0x0U<<28) @@ -137,6 +134,8 @@ #define XEN_DOMCTL_PFINFO_LTABTYPE_MASK (0x7U<<28) #define XEN_DOMCTL_PFINFO_LPINTAB (0x1U<<31) #define XEN_DOMCTL_PFINFO_XTAB (0xfU<<28) /* invalid page */ +#define XEN_DOMCTL_PFINFO_XALLOC (0xeU<<28) /* allocate-only page */ +#define XEN_DOMCTL_PFINFO_PAGEDTAB (0x8U<<28) #define XEN_DOMCTL_PFINFO_LTAB_MASK (0xfU<<28) struct xen_domctl_getpageframeinfo { @@ -150,21 +149,29 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo_t); -#define XEN_DOMCTL_getpageframeinfo2 8 +/* XEN_DOMCTL_getpageframeinfo2 */ struct xen_domctl_getpageframeinfo2 { /* IN variables. */ uint64_aligned_t num; /* IN/OUT variables. */ - XEN_GUEST_HANDLE_64(uint32_t) array; + XEN_GUEST_HANDLE_64(uint32) array; }; typedef struct xen_domctl_getpageframeinfo2 xen_domctl_getpageframeinfo2_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_getpageframeinfo2_t); +/* XEN_DOMCTL_getpageframeinfo3 */ +struct xen_domctl_getpageframeinfo3 { + /* IN variables. */ + uint64_aligned_t num; + /* IN/OUT variables. */ + XEN_GUEST_HANDLE_64(xen_pfn_t) array; +}; + /* * Control shadow pagetables operation */ -#define XEN_DOMCTL_shadow_op 10 +/* XEN_DOMCTL_shadow_op */ /* Disable shadow mode. */ #define XEN_DOMCTL_SHADOW_OP_OFF 0 @@ -229,7 +236,7 @@ uint32_t mb; /* Shadow memory allocation in MB */ /* OP_PEEK / OP_CLEAN */ - XEN_GUEST_HANDLE_64(uint8_t) dirty_bitmap; + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; uint64_aligned_t pages; /* Size of buffer. Updated with actual size. */ struct xen_domctl_shadow_op_stats stats; }; @@ -237,7 +244,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_shadow_op_t); -#define XEN_DOMCTL_max_mem 11 +/* XEN_DOMCTL_max_mem */ struct xen_domctl_max_mem { /* IN variables. */ uint64_aligned_t max_memkb; @@ -246,8 +253,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_mem_t); -#define XEN_DOMCTL_setvcpucontext 12 -#define XEN_DOMCTL_getvcpucontext 13 +/* XEN_DOMCTL_setvcpucontext */ +/* XEN_DOMCTL_getvcpucontext */ struct xen_domctl_vcpucontext { uint32_t vcpu; /* IN */ XEN_GUEST_HANDLE_64(vcpu_guest_context_t) ctxt; /* IN/OUT */ @@ -256,7 +263,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpucontext_t); -#define XEN_DOMCTL_getvcpuinfo 14 +/* XEN_DOMCTL_getvcpuinfo */ struct xen_domctl_getvcpuinfo { /* IN variables. */ uint32_t vcpu; @@ -272,8 +279,8 @@ /* Get/set which physical cpus a vcpu can execute on. */ -#define XEN_DOMCTL_setvcpuaffinity 9 -#define XEN_DOMCTL_getvcpuaffinity 25 +/* XEN_DOMCTL_setvcpuaffinity */ +/* XEN_DOMCTL_getvcpuaffinity */ struct xen_domctl_vcpuaffinity { uint32_t vcpu; /* IN */ struct xenctl_cpumap cpumap; /* IN/OUT */ @@ -282,7 +289,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuaffinity_t); -#define XEN_DOMCTL_max_vcpus 15 +/* XEN_DOMCTL_max_vcpus */ struct xen_domctl_max_vcpus { uint32_t max; /* maximum number of vcpus */ }; @@ -290,10 +297,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_max_vcpus_t); -#define XEN_DOMCTL_scheduler_op 16 +/* XEN_DOMCTL_scheduler_op */ /* Scheduler types. */ #define XEN_SCHEDULER_SEDF 4 #define XEN_SCHEDULER_CREDIT 5 +#define XEN_SCHEDULER_CREDIT2 6 +#define XEN_SCHEDULER_ARINC653 7 /* Set or get info? */ #define XEN_DOMCTL_SCHEDOP_putinfo 0 #define XEN_DOMCTL_SCHEDOP_getinfo 1 @@ -312,13 +321,16 @@ uint16_t weight; uint16_t cap; } credit; + struct xen_domctl_sched_credit2 { + uint16_t weight; + } credit2; } u; }; typedef struct xen_domctl_scheduler_op xen_domctl_scheduler_op_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_scheduler_op_t); -#define XEN_DOMCTL_setdomainhandle 17 +/* XEN_DOMCTL_setdomainhandle */ struct xen_domctl_setdomainhandle { xen_domain_handle_t handle; }; @@ -326,7 +338,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdomainhandle_t); -#define XEN_DOMCTL_setdebugging 18 +/* XEN_DOMCTL_setdebugging */ struct xen_domctl_setdebugging { uint8_t enable; }; @@ -334,7 +346,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_setdebugging_t); -#define XEN_DOMCTL_irq_permission 19 +/* XEN_DOMCTL_irq_permission */ struct xen_domctl_irq_permission { uint8_t pirq; uint8_t allow_access; /* flag to specify enable/disable of IRQ access */ @@ -343,7 +355,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_irq_permission_t); -#define XEN_DOMCTL_iomem_permission 20 +/* XEN_DOMCTL_iomem_permission */ struct xen_domctl_iomem_permission { uint64_aligned_t first_mfn;/* first page (physical page number) in range */ uint64_aligned_t nr_mfns; /* number of pages in range (>0) */ @@ -353,7 +365,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_iomem_permission_t); -#define XEN_DOMCTL_ioport_permission 21 +/* XEN_DOMCTL_ioport_permission */ struct xen_domctl_ioport_permission { uint32_t first_port; /* first port int range */ uint32_t nr_ports; /* size of port range */ @@ -363,7 +375,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_ioport_permission_t); -#define XEN_DOMCTL_hypercall_init 22 +/* XEN_DOMCTL_hypercall_init */ struct xen_domctl_hypercall_init { uint64_aligned_t gmfn; /* GMFN to be initialised */ }; @@ -371,7 +383,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_hypercall_init_t); -#define XEN_DOMCTL_arch_setup 23 +/* XEN_DOMCTL_arch_setup */ #define _XEN_DOMAINSETUP_hvm_guest 0 #define XEN_DOMAINSETUP_hvm_guest (1UL<<_XEN_DOMAINSETUP_hvm_guest) #define _XEN_DOMAINSETUP_query 1 /* Get parameters (for save) */ @@ -391,35 +403,33 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_arch_setup_t); -#define XEN_DOMCTL_settimeoffset 24 +/* XEN_DOMCTL_settimeoffset */ struct xen_domctl_settimeoffset { int32_t time_offset_seconds; /* applied to domain wallclock time */ }; typedef struct xen_domctl_settimeoffset xen_domctl_settimeoffset_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_settimeoffset_t); - -#define XEN_DOMCTL_gethvmcontext 33 -#define XEN_DOMCTL_sethvmcontext 34 +/* XEN_DOMCTL_gethvmcontext */ +/* XEN_DOMCTL_sethvmcontext */ typedef struct xen_domctl_hvmcontext { uint32_t size; /* IN/OUT: size of buffer / bytes filled */ - XEN_GUEST_HANDLE_64(uint8_t) buffer; /* IN/OUT: data, or call - * gethvmcontext with NULL - * buffer to get size - * req'd */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* IN/OUT: data, or call + * gethvmcontext with NULL + * buffer to get size req'd */ } xen_domctl_hvmcontext_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_t); -#define XEN_DOMCTL_set_address_size 35 -#define XEN_DOMCTL_get_address_size 36 +/* XEN_DOMCTL_set_address_size */ +/* XEN_DOMCTL_get_address_size */ typedef struct xen_domctl_address_size { uint32_t size; } xen_domctl_address_size_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_address_size_t); -#define XEN_DOMCTL_real_mode_area 26 +/* XEN_DOMCTL_real_mode_area */ struct xen_domctl_real_mode_area { uint32_t log; /* log2 of Real Mode Area size */ }; @@ -427,10 +437,12 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_real_mode_area_t); -#define XEN_DOMCTL_sendtrigger 28 +/* XEN_DOMCTL_sendtrigger */ #define XEN_DOMCTL_SENDTRIGGER_NMI 0 #define XEN_DOMCTL_SENDTRIGGER_RESET 1 #define XEN_DOMCTL_SENDTRIGGER_INIT 2 +#define XEN_DOMCTL_SENDTRIGGER_POWER 3 +#define XEN_DOMCTL_SENDTRIGGER_SLEEP 4 struct xen_domctl_sendtrigger { uint32_t trigger; /* IN */ uint32_t vcpu; /* IN */ @@ -440,19 +452,19 @@ /* Assign PCI device to HVM guest. Sets up IOMMU structures. */ -#define XEN_DOMCTL_assign_device 37 -#define XEN_DOMCTL_test_assign_device 45 -#define XEN_DOMCTL_deassign_device 47 +/* XEN_DOMCTL_assign_device */ +/* XEN_DOMCTL_test_assign_device */ +/* XEN_DOMCTL_deassign_device */ struct xen_domctl_assign_device { - uint32_t machine_bdf; /* machine PCI ID of assigned device */ + uint32_t machine_sbdf; /* machine PCI ID of assigned device */ }; typedef struct xen_domctl_assign_device xen_domctl_assign_device_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_assign_device_t); -/* Retrieve sibling devices infomation of machine_bdf */ -#define XEN_DOMCTL_get_device_group 50 +/* Retrieve sibling devices infomation of machine_sbdf */ +/* XEN_DOMCTL_get_device_group */ struct xen_domctl_get_device_group { - uint32_t machine_bdf; /* IN */ + uint32_t machine_sbdf; /* IN */ uint32_t max_sdevs; /* IN */ uint32_t num_sdevs; /* OUT */ XEN_GUEST_HANDLE_64(uint32) sdev_array; /* OUT */ @@ -461,12 +473,13 @@ DEFINE_XEN_GUEST_HANDLE(xen_domctl_get_device_group_t); /* Pass-through interrupts: bind real irq -> hvm devfn. */ -#define XEN_DOMCTL_bind_pt_irq 38 -#define XEN_DOMCTL_unbind_pt_irq 48 +/* XEN_DOMCTL_bind_pt_irq */ +/* XEN_DOMCTL_unbind_pt_irq */ typedef enum pt_irq_type_e { PT_IRQ_TYPE_PCI, PT_IRQ_TYPE_ISA, PT_IRQ_TYPE_MSI, + PT_IRQ_TYPE_MSI_TRANSLATE, } pt_irq_type_t; struct xen_domctl_bind_pt_irq { uint32_t machine_irq; @@ -485,6 +498,7 @@ struct { uint8_t gvec; uint32_t gflags; + uint64_aligned_t gtable; } msi; } u; }; @@ -493,7 +507,7 @@ /* Bind machine I/O address range -> HVM address range. */ -#define XEN_DOMCTL_memory_mapping 39 +/* XEN_DOMCTL_memory_mapping */ #define DPCI_ADD_MAPPING 1 #define DPCI_REMOVE_MAPPING 0 struct xen_domctl_memory_mapping { @@ -508,7 +522,7 @@ /* Bind machine I/O port range -> HVM I/O port range. */ -#define XEN_DOMCTL_ioport_mapping 40 +/* XEN_DOMCTL_ioport_mapping */ struct xen_domctl_ioport_mapping { uint32_t first_gport; /* first guest IO port*/ uint32_t first_mport; /* first machine IO port */ @@ -522,7 +536,7 @@ /* * Pin caching type of RAM space for x86 HVM domU. */ -#define XEN_DOMCTL_pin_mem_cacheattr 41 +/* XEN_DOMCTL_pin_mem_cacheattr */ /* Caching types: these happen to be the same as x86 MTRR/PAT type codes. */ #define XEN_DOMCTL_MEM_CACHEATTR_UC 0 #define XEN_DOMCTL_MEM_CACHEATTR_WC 1 @@ -532,20 +546,20 @@ #define XEN_DOMCTL_MEM_CACHEATTR_UCM 7 struct xen_domctl_pin_mem_cacheattr { uint64_aligned_t start, end; - unsigned int type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ + uint32_t type; /* XEN_DOMCTL_MEM_CACHEATTR_* */ }; typedef struct xen_domctl_pin_mem_cacheattr xen_domctl_pin_mem_cacheattr_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_pin_mem_cacheattr_t); -#define XEN_DOMCTL_set_ext_vcpucontext 42 -#define XEN_DOMCTL_get_ext_vcpucontext 43 +/* XEN_DOMCTL_set_ext_vcpucontext */ +/* XEN_DOMCTL_get_ext_vcpucontext */ struct xen_domctl_ext_vcpucontext { /* IN: VCPU that this call applies to. */ uint32_t vcpu; /* * SET: Size of struct (IN) - * GET: Size of struct (OUT) + * GET: Size of struct (OUT, up to 128 bytes) */ uint32_t size; #if defined(__i386__) || defined(__x86_64__) @@ -557,6 +571,7 @@ uint16_t sysenter_callback_cs; uint8_t syscall32_disables_events; uint8_t sysenter_disables_events; + uint64_aligned_t mcg_cap; #endif }; typedef struct xen_domctl_ext_vcpucontext xen_domctl_ext_vcpucontext_t; @@ -565,7 +580,7 @@ /* * Set optimizaton features for a domain */ -#define XEN_DOMCTL_set_opt_feature 44 +/* XEN_DOMCTL_set_opt_feature */ struct xen_domctl_set_opt_feature { #if defined(__ia64__) struct xen_ia64_opt_feature optf; @@ -580,7 +595,7 @@ /* * Set the target domain for a domain */ -#define XEN_DOMCTL_set_target 46 +/* XEN_DOMCTL_set_target */ struct xen_domctl_set_target { domid_t target; }; @@ -589,19 +604,19 @@ #if defined(__i386__) || defined(__x86_64__) # define XEN_CPUID_INPUT_UNUSED 0xFFFFFFFF -# define XEN_DOMCTL_set_cpuid 49 +/* XEN_DOMCTL_set_cpuid */ struct xen_domctl_cpuid { - unsigned int input[2]; - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; + uint32_t input[2]; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; }; typedef struct xen_domctl_cpuid xen_domctl_cpuid_t; DEFINE_XEN_GUEST_HANDLE(xen_domctl_cpuid_t); #endif -#define XEN_DOMCTL_subscribe 29 +/* XEN_DOMCTL_subscribe */ struct xen_domctl_subscribe { uint32_t port; /* IN */ }; @@ -612,12 +627,285 @@ * Define the maximum machine address size which should be allocated * to a guest. */ -#define XEN_DOMCTL_set_machine_address_size 51 -#define XEN_DOMCTL_get_machine_address_size 52 +/* XEN_DOMCTL_set_machine_address_size */ +/* XEN_DOMCTL_get_machine_address_size */ + +/* + * Do not inject spurious page faults into this domain. + */ +/* XEN_DOMCTL_suppress_spurious_page_faults */ + +/* XEN_DOMCTL_debug_op */ +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0 +#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1 +struct xen_domctl_debug_op { + uint32_t op; /* IN */ + uint32_t vcpu; /* IN */ +}; +typedef struct xen_domctl_debug_op xen_domctl_debug_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t); + +/* + * Request a particular record from the HVM context + */ +/* XEN_DOMCTL_gethvmcontext_partial */ +typedef struct xen_domctl_hvmcontext_partial { + uint32_t type; /* IN: Type of record required */ + uint32_t instance; /* IN: Instance of that type */ + XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */ +} xen_domctl_hvmcontext_partial_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t); + +/* XEN_DOMCTL_disable_migrate */ +typedef struct xen_domctl_disable_migrate { + uint32_t disable; /* IN: 1: disable migration and restore */ +} xen_domctl_disable_migrate_t; + + +/* XEN_DOMCTL_gettscinfo */ +/* XEN_DOMCTL_settscinfo */ +struct xen_guest_tsc_info { + uint32_t tsc_mode; + uint32_t gtsc_khz; + uint32_t incarnation; + uint32_t pad; + uint64_aligned_t elapsed_nsec; +}; +typedef struct xen_guest_tsc_info xen_guest_tsc_info_t; +DEFINE_XEN_GUEST_HANDLE(xen_guest_tsc_info_t); +typedef struct xen_domctl_tsc_info { + XEN_GUEST_HANDLE_64(xen_guest_tsc_info_t) out_info; /* OUT */ + xen_guest_tsc_info_t info; /* IN */ +} xen_domctl_tsc_info_t; + +/* XEN_DOMCTL_gdbsx_guestmemio guest mem io */ +struct xen_domctl_gdbsx_memio { + /* IN */ + uint64_aligned_t pgd3val;/* optional: init_mm.pgd[3] value */ + uint64_aligned_t gva; /* guest virtual address */ + uint64_aligned_t uva; /* user buffer virtual address */ + uint32_t len; /* number of bytes to read/write */ + uint8_t gwr; /* 0 = read from guest. 1 = write to guest */ + /* OUT */ + uint32_t remain; /* bytes remaining to be copied */ +}; + +/* XEN_DOMCTL_gdbsx_pausevcpu */ +/* XEN_DOMCTL_gdbsx_unpausevcpu */ +struct xen_domctl_gdbsx_pauseunp_vcpu { /* pause/unpause a vcpu */ + uint32_t vcpu; /* which vcpu */ +}; + +/* XEN_DOMCTL_gdbsx_domstatus */ +struct xen_domctl_gdbsx_domstatus { + /* OUT */ + uint8_t paused; /* is the domain paused */ + uint32_t vcpu_id; /* any vcpu in an event? */ + uint32_t vcpu_ev; /* if yes, what event? */ +}; + +/* + * Memory event operations + */ + +/* XEN_DOMCTL_mem_event_op */ + +/* + * Domain memory paging + * Page memory in and out. + * Domctl interface to set up and tear down the + * pager<->hypervisor interface. Use XENMEM_paging_op* + * to perform per-page operations. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING 1 + +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_PAGING_DISABLE 1 + +/* + * Access permissions. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * There are HVM hypercalls to set the per-page access permissions of every + * page in a domain. When one of these permissions--independent, read, + * write, and execute--is violated, the VCPU is paused and a memory event + * is sent with what happened. (See public/mem_event.h) . + * + * The memory event handler can then resume the VCPU and redo the access + * with a XENMEM_access_op_resume hypercall. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS 2 + +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_ACCESS_DISABLE 1 + +/* + * Sharing ENOMEM helper. + * + * As with paging, use the domctl for teardown/setup of the + * helper<->hypervisor interface. + * + * If setup, this ring is used to communicate failed allocations + * in the unshare path. XENMEM_sharing_op_resume is used to wake up + * vcpus that could not unshare. + * + * Note that shring can be turned on (as per the domctl below) + * *without* this ring being setup. + */ +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING 3 + +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_ENABLE 0 +#define XEN_DOMCTL_MEM_EVENT_OP_SHARING_DISABLE 1 + +/* Use for teardown/setup of helper<->hypervisor interface for paging, + * access and sharing.*/ +struct xen_domctl_mem_event_op { + uint32_t op; /* XEN_DOMCTL_MEM_EVENT_OP_*_* */ + uint32_t mode; /* XEN_DOMCTL_MEM_EVENT_OP_* */ + + uint32_t port; /* OUT: event channel for ring */ +}; +typedef struct xen_domctl_mem_event_op xen_domctl_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_event_op_t); + +/* + * Memory sharing operations + */ +/* XEN_DOMCTL_mem_sharing_op. + * The CONTROL sub-domctl is used for bringup/teardown. */ +#define XEN_DOMCTL_MEM_SHARING_CONTROL 0 + +struct xen_domctl_mem_sharing_op { + uint8_t op; /* XEN_DOMCTL_MEM_SHARING_* */ + + union { + uint8_t enable; /* CONTROL */ + } u; +}; +typedef struct xen_domctl_mem_sharing_op xen_domctl_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_mem_sharing_op_t); + +struct xen_domctl_audit_p2m { + /* OUT error counts */ + uint64_t orphans; + uint64_t m2p_bad; + uint64_t p2m_bad; +}; +typedef struct xen_domctl_audit_p2m xen_domctl_audit_p2m_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_audit_p2m_t); + +struct xen_domctl_set_virq_handler { + uint32_t virq; /* IN */ +}; +typedef struct xen_domctl_set_virq_handler xen_domctl_set_virq_handler_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_virq_handler_t); + +#if defined(__i386__) || defined(__x86_64__) +/* XEN_DOMCTL_setvcpuextstate */ +/* XEN_DOMCTL_getvcpuextstate */ +struct xen_domctl_vcpuextstate { + /* IN: VCPU that this call applies to. */ + uint32_t vcpu; + /* + * SET: xfeature support mask of struct (IN) + * GET: xfeature support mask of struct (IN/OUT) + * xfeature mask is served as identifications of the saving format + * so that compatible CPUs can have a check on format to decide + * whether it can restore. + */ + uint64_aligned_t xfeature_mask; + /* + * SET: Size of struct (IN) + * GET: Size of struct (IN/OUT) + */ + uint64_aligned_t size; + XEN_GUEST_HANDLE_64(uint64) buffer; +}; +typedef struct xen_domctl_vcpuextstate xen_domctl_vcpuextstate_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_vcpuextstate_t); +#endif +/* XEN_DOMCTL_set_access_required: sets whether a memory event listener + * must be present to handle page access events: if false, the page + * access will revert to full permissions if no one is listening; + * */ +struct xen_domctl_set_access_required { + uint8_t access_required; +}; +typedef struct xen_domctl_set_access_required xen_domctl_set_access_required_t; +DEFINE_XEN_GUEST_HANDLE(xen_domctl_set_access_required_t); struct xen_domctl { uint32_t cmd; +#define XEN_DOMCTL_createdomain 1 +#define XEN_DOMCTL_destroydomain 2 +#define XEN_DOMCTL_pausedomain 3 +#define XEN_DOMCTL_unpausedomain 4 +#define XEN_DOMCTL_getdomaininfo 5 +#define XEN_DOMCTL_getmemlist 6 +#define XEN_DOMCTL_getpageframeinfo 7 +#define XEN_DOMCTL_getpageframeinfo2 8 +#define XEN_DOMCTL_setvcpuaffinity 9 +#define XEN_DOMCTL_shadow_op 10 +#define XEN_DOMCTL_max_mem 11 +#define XEN_DOMCTL_setvcpucontext 12 +#define XEN_DOMCTL_getvcpucontext 13 +#define XEN_DOMCTL_getvcpuinfo 14 +#define XEN_DOMCTL_max_vcpus 15 +#define XEN_DOMCTL_scheduler_op 16 +#define XEN_DOMCTL_setdomainhandle 17 +#define XEN_DOMCTL_setdebugging 18 +#define XEN_DOMCTL_irq_permission 19 +#define XEN_DOMCTL_iomem_permission 20 +#define XEN_DOMCTL_ioport_permission 21 +#define XEN_DOMCTL_hypercall_init 22 +#define XEN_DOMCTL_arch_setup 23 +#define XEN_DOMCTL_settimeoffset 24 +#define XEN_DOMCTL_getvcpuaffinity 25 +#define XEN_DOMCTL_real_mode_area 26 +#define XEN_DOMCTL_resumedomain 27 +#define XEN_DOMCTL_sendtrigger 28 +#define XEN_DOMCTL_subscribe 29 +#define XEN_DOMCTL_gethvmcontext 33 +#define XEN_DOMCTL_sethvmcontext 34 +#define XEN_DOMCTL_set_address_size 35 +#define XEN_DOMCTL_get_address_size 36 +#define XEN_DOMCTL_assign_device 37 +#define XEN_DOMCTL_bind_pt_irq 38 +#define XEN_DOMCTL_memory_mapping 39 +#define XEN_DOMCTL_ioport_mapping 40 +#define XEN_DOMCTL_pin_mem_cacheattr 41 +#define XEN_DOMCTL_set_ext_vcpucontext 42 +#define XEN_DOMCTL_get_ext_vcpucontext 43 +#define XEN_DOMCTL_set_opt_feature 44 +#define XEN_DOMCTL_test_assign_device 45 +#define XEN_DOMCTL_set_target 46 +#define XEN_DOMCTL_deassign_device 47 +#define XEN_DOMCTL_unbind_pt_irq 48 +#define XEN_DOMCTL_set_cpuid 49 +#define XEN_DOMCTL_get_device_group 50 +#define XEN_DOMCTL_set_machine_address_size 51 +#define XEN_DOMCTL_get_machine_address_size 52 +#define XEN_DOMCTL_suppress_spurious_page_faults 53 +#define XEN_DOMCTL_debug_op 54 +#define XEN_DOMCTL_gethvmcontext_partial 55 +#define XEN_DOMCTL_mem_event_op 56 +#define XEN_DOMCTL_mem_sharing_op 57 +#define XEN_DOMCTL_disable_migrate 58 +#define XEN_DOMCTL_gettscinfo 59 +#define XEN_DOMCTL_settscinfo 60 +#define XEN_DOMCTL_getpageframeinfo3 61 +#define XEN_DOMCTL_setvcpuextstate 62 +#define XEN_DOMCTL_getvcpuextstate 63 +#define XEN_DOMCTL_set_access_required 64 +#define XEN_DOMCTL_audit_p2m 65 +#define XEN_DOMCTL_set_virq_handler 66 +#define XEN_DOMCTL_gdbsx_guestmemio 1000 +#define XEN_DOMCTL_gdbsx_pausevcpu 1001 +#define XEN_DOMCTL_gdbsx_unpausevcpu 1002 +#define XEN_DOMCTL_gdbsx_domstatus 1003 uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */ domid_t domain; union { @@ -626,6 +914,7 @@ struct xen_domctl_getmemlist getmemlist; struct xen_domctl_getpageframeinfo getpageframeinfo; struct xen_domctl_getpageframeinfo2 getpageframeinfo2; + struct xen_domctl_getpageframeinfo3 getpageframeinfo3; struct xen_domctl_vcpuaffinity vcpuaffinity; struct xen_domctl_shadow_op shadow_op; struct xen_domctl_max_mem max_mem; @@ -641,8 +930,11 @@ struct xen_domctl_hypercall_init hypercall_init; struct xen_domctl_arch_setup arch_setup; struct xen_domctl_settimeoffset settimeoffset; + struct xen_domctl_disable_migrate disable_migrate; + struct xen_domctl_tsc_info tsc_info; struct xen_domctl_real_mode_area real_mode_area; struct xen_domctl_hvmcontext hvmcontext; + struct xen_domctl_hvmcontext_partial hvmcontext_partial; struct xen_domctl_address_size address_size; struct xen_domctl_sendtrigger sendtrigger; struct xen_domctl_get_device_group get_device_group; @@ -655,9 +947,19 @@ struct xen_domctl_set_opt_feature set_opt_feature; struct xen_domctl_set_target set_target; struct xen_domctl_subscribe subscribe; + struct xen_domctl_debug_op debug_op; + struct xen_domctl_mem_event_op mem_event_op; + struct xen_domctl_mem_sharing_op mem_sharing_op; #if defined(__i386__) || defined(__x86_64__) struct xen_domctl_cpuid cpuid; + struct xen_domctl_vcpuextstate vcpuextstate; #endif + struct xen_domctl_set_access_required access_required; + struct xen_domctl_audit_p2m audit_p2m; + struct xen_domctl_set_virq_handler set_virq_handler; + struct xen_domctl_gdbsx_memio gdbsx_guest_memio; + struct xen_domctl_gdbsx_pauseunp_vcpu gdbsx_pauseunp_vcpu; + struct xen_domctl_gdbsx_domstatus gdbsx_domstatus; uint8_t pad[128]; } u; }; ==== //depot/SpectraBSD/stable/9/sys/xen/interface/elfnote.h#2 (text) ==== @@ -162,9 +162,39 @@ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 /* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The features supported by this kernel (numeric). + * + * Other than XEN_ELFNOTE_FEATURES on pre-4.2 Xen, this note allows a + * kernel to specify support for features that older hypervisors don't + * know about. The set of features 4.2 and newer hypervisors will + * consider supported by the kernel is the combination of the sets + * specified through this and the string note. + * + * LEGACY: FEATURES + */ +#define XEN_ELFNOTE_SUPPORTED_FEATURES 17 + +/* * The number of the highest elfnote defined. */ -#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUPPORTED_FEATURES /* * System information exported through crash notes. ==== //depot/SpectraBSD/stable/9/sys/xen/interface/event_channel.h#2 (text) ==== @@ -1,8 +1,8 @@ /****************************************************************************** * event_channel.h - * + * * Event channels between domains. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -28,12 +28,49 @@ #define __XEN_PUBLIC_EVENT_CHANNEL_H__ /* - * Prototype for this hypercall is: - * int event_channel_op(int cmd, void *args) - * @cmd == EVTCHNOP_??? (event-channel operation). - * @args == Operation-specific extra arguments (NULL if none). + * `incontents 150 evtchn Event Channels + * + * Event channels are the basic primitive provided by Xen for event + * notifications. An event is the Xen equivalent of a hardware + * interrupt. They essentially store one bit of information, the event + * of interest is signalled by transitioning this bit from 0 to 1. + * + * Notifications are received by a guest via an upcall from Xen, + * indicating when an event arrives (setting the bit). Further + * notifications are masked until the bit is cleared again (therefore, + * guests must check the value of the bit after re-enabling event + * delivery to ensure no missed notifications). + * + * Event notifications can be masked by setting a flag; this is + * equivalent to disabling interrupts and can be used to ensure + * atomicity of certain operations in the guest kernel. + * + * Event channels are represented by the evtchn_* fields in + * struct shared_info and struct vcpu_info. + */ + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op(enum event_channel_op cmd, void *args) + * ` + * @cmd == EVTCHNOP_* (event-channel operation). + * @args == struct evtchn_* Operation-specific extra arguments (NULL if none). */ +/* ` enum event_channel_op { // EVTCHNOP_* => struct evtchn_* */ +#define EVTCHNOP_bind_interdomain 0 +#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_close 3 +#define EVTCHNOP_send 4 +#define EVTCHNOP_status 5 +#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_unmask 9 +#define EVTCHNOP_reset 10 +/* ` } */ + typedef uint32_t evtchn_port_t; DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); @@ -45,7 +82,6 @@ * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_alloc_unbound 6 struct evtchn_alloc_unbound { /* IN parameters */ domid_t dom, remote_dom; @@ -61,9 +97,8 @@ * domain. A fresh port is allocated in the calling domain and returned as * . * NOTES: - * 2. may be DOMID_SELF, allowing loopback connections. + * 1. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_bind_interdomain 0 struct evtchn_bind_interdomain { /* IN parameters. */ domid_t remote_dom; @@ -85,10 +120,9 @@ * The allocated event channel is bound to the specified vcpu and the * binding cannot be changed. */ -#define EVTCHNOP_bind_virq 1 struct evtchn_bind_virq { /* IN parameters. */ - uint32_t virq; + uint32_t virq; /* enum virq */ uint32_t vcpu; /* OUT parameters. */ evtchn_port_t port; @@ -96,12 +130,11 @@ typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* - * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . + * EVTCHNOP_bind_pirq: Bind a local event channel to a real IRQ (PIRQ ). * NOTES: * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ -#define EVTCHNOP_bind_pirq 2 struct evtchn_bind_pirq { /* IN parameters. */ uint32_t pirq; @@ -118,7 +151,6 @@ * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ -#define EVTCHNOP_bind_ipi 7 struct evtchn_bind_ipi { uint32_t vcpu; /* OUT parameters. */ @@ -131,7 +163,6 @@ * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ -#define EVTCHNOP_close 3 struct evtchn_close { /* IN parameters. */ evtchn_port_t port; @@ -142,7 +173,6 @@ * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ -#define EVTCHNOP_send 4 struct evtchn_send { /* IN parameters. */ evtchn_port_t port; @@ -157,7 +187,6 @@ * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ -#define EVTCHNOP_status 5 struct evtchn_status { /* IN parameters */ domid_t dom; @@ -174,13 +203,13 @@ union { struct { domid_t dom; - } unbound; /* EVTCHNSTAT_unbound */ + } unbound; /* EVTCHNSTAT_unbound */ struct { domid_t dom; evtchn_port_t port; - } interdomain; /* EVTCHNSTAT_interdomain */ - uint32_t pirq; /* EVTCHNSTAT_pirq */ - uint32_t virq; /* EVTCHNSTAT_virq */ + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ } u; }; typedef struct evtchn_status evtchn_status_t; @@ -197,7 +226,6 @@ * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ -#define EVTCHNOP_bind_vcpu 8 struct evtchn_bind_vcpu { /* IN parameters. */ evtchn_port_t port; @@ -209,7 +237,6 @@ * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ -#define EVTCHNOP_unmask 9 struct evtchn_unmask { /* IN parameters. */ evtchn_port_t port; @@ -222,7 +249,6 @@ * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. */ -#define EVTCHNOP_reset 10 struct evtchn_reset { /* IN parameters. */ domid_t dom; @@ -230,11 +256,13 @@ typedef struct evtchn_reset evtchn_reset_t; /* - * Argument to event_channel_op_compat() hypercall. Superceded by new - * event_channel_op() hypercall since 0x00030202. + * ` enum neg_errnoval + * ` HYPERVISOR_event_channel_op_compat(struct evtchn_op *op) + * ` + * Superceded by new event_channel_op() hypercall since 0x00030202. */ struct evtchn_op { - uint32_t cmd; /* EVTCHNOP_* */ + uint32_t cmd; /* enum event_channel_op */ union { struct evtchn_alloc_unbound alloc_unbound; struct evtchn_bind_interdomain bind_interdomain; ==== //depot/SpectraBSD/stable/9/sys/xen/interface/features.h#2 (text) ==== @@ -59,6 +59,27 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + +/* x86: Does this Xen host support the HVM callback vector type? */ +#define XENFEAT_hvm_callback_vector 8 + +/* x86: pvclock algorithm is safe to use on HVM */ +#define XENFEAT_hvm_safe_pvclock 9 + +/* x86: pirq can be used by HVM guests */ +#define XENFEAT_hvm_pirqs 10 + +/* operation as Dom0 is supported */ +#define XENFEAT_dom0 11 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/grant_table.h#2 (text) ==== @@ -1,9 +1,9 @@ /****************************************************************************** * grant_table.h - * + * * Interface for granting foreign access to page frames, and receiving * page-ownership transfers. - * + * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the @@ -28,6 +28,28 @@ #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ #define __XEN_PUBLIC_GRANT_TABLE_H__ +/* + * `incontents 150 gnttab Grant Tables + * + * Xen's grant tables provide a generic mechanism to memory sharing + * between domains. This shared memory interface underpins the split + * device drivers for block and network IO. + * + * Each domain has its own grant table. This is a data structure that + * is shared with Xen; it allows the domain to tell Xen what kind of + * permissions other domains have on its pages. Entries in the grant + * table are identified by grant references. A grant reference is an + * integer, which indexes into the grant table. It acts as a + * capability which the grantee can use to perform operations on the + * granter’s memory. + * + * This capability-based system allows shared-memory communications + * between unprivileged domains. A grant reference also encapsulates + * the details of a shared page, removing the need for a domain to + * know the real machine address of a page it is sharing. This makes + * it possible to share memory correctly with domains running in + * fully virtualised memory. + */ /*********************************** * GRANT TABLE REPRESENTATION @@ -35,11 +57,12 @@ /* Some rough guidelines on accessing and updating grant-table entries * in a concurrency-safe manner. For more information, Linux contains a - * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). - * + * reference implementation for guest OSes (drivers/xen/grant_table.c, see + * http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=blob;f=drivers/xen/grant-table.c;hb=HEAD + * * NB. WMB is a no-op on current-generation x86 processors. However, a * compiler barrier will still be required. - * + * * Introducing a valid entry into the grant table: * 1. Write ent->domid. * 2. Write ent->frame: @@ -48,7 +71,7 @@ * frame, or zero if none. * 3. Write memory barrier (WMB). * 4. Write ent->flags, inc. valid type. - * + * * Invalidating an unused GTF_permit_access entry: * 1. flags = ent->flags. * 2. Observe that !(flags & (GTF_reading|GTF_writing)). @@ -60,7 +83,7 @@ * This cannot be done directly. Request assistance from the domain controller * which can set a timeout on the use of a grant entry and take necessary * action. (NB. This is not yet implemented!). - * + * * Invalidating an unused GTF_accept_transfer entry: * 1. flags = ent->flags. * 2. Observe that !(flags & GTF_transfer_committed). [*] @@ -78,18 +101,34 @@ * * Changing a GTF_permit_access from writable to read-only: * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. - * + * * Changing a GTF_permit_access from read-only to writable: * Use SMP-safe bit-setting instruction. */ /* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +#define GRANT_REF_INVALID 0xffffffff + +/* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -100,7 +139,14 @@ */ uint32_t frame; }; -typedef struct grant_entry grant_entry_t; +typedef struct grant_entry_v1 grant_entry_v1_t; + +/* The first few grant table entries will be preserved across grant table + * version changes and may be pre-populated at domain creation by tools. + */ +#define GNTTAB_NR_RESERVED_ENTRIES 8 +#define GNTTAB_RESERVED_CONSOLE 0 +#define GNTTAB_RESERVED_XENSTORE 1 /* * Type of grant entry. @@ -108,10 +154,13 @@ * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -120,6 +169,9 @@ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -133,6 +185,8 @@ #define GTF_PCD (1U<<_GTF_PCD) #define _GTF_PAT (7) #define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -149,17 +203,114 @@ #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; + +/* + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + /*********************************** * GRANT TABLE QUERIES AND USES */ -/* - * Reference to a grant entry in a specified domain's grant table. +/* ` enum neg_errnoval + * ` HYPERVISOR_grant_table_op(enum grant_table_op cmd, + * ` void *args, + * ` unsigned int count) + * ` + * + * @args points to an array of a per-command data structure. The array + * has @count members */ -typedef uint32_t grant_ref_t; -#define GRANT_REF_INVALID 0xffffffff +/* ` enum grant_table_op { // GNTTABOP_* => struct gnttab_* */ +#define GNTTABOP_map_grant_ref 0 +#define GNTTABOP_unmap_grant_ref 1 +#define GNTTABOP_setup_table 2 +#define GNTTABOP_dump_table 3 +#define GNTTABOP_transfer 4 +#define GNTTABOP_copy 5 +#define GNTTABOP_query_size 6 +#define GNTTABOP_unmap_and_replace 7 +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +#define GNTTABOP_set_version 8 +#define GNTTABOP_get_status_frames 9 +#define GNTTABOP_get_version 10 +#define GNTTABOP_swap_grant_ref 11 +#endif /* __XEN_INTERFACE_VERSION__ */ +/* ` } */ /* * Handle to track a mapping created via a grant reference. @@ -177,13 +328,12 @@ * 2. If GNTMAP_host_map is specified then a mapping will be added at * either a host virtual address in the current address space, or at * a PTE at the specified machine address. The type of mapping to - * perform is selected through the GNTMAP_contains_pte flag, and the + * perform is selected through the GNTMAP_contains_pte flag, and the * address is specified in . * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a * host mapping is destroyed by other means then it is *NOT* guaranteed * to be accounted to the correct grant reference! */ -#define GNTTABOP_map_grant_ref 0 struct gnttab_map_grant_ref { /* IN parameters. */ uint64_t host_addr; @@ -191,7 +341,7 @@ grant_ref_t ref; domid_t dom; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ grant_handle_t handle; uint64_t dev_bus_addr; }; @@ -209,14 +359,13 @@ * 3. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ -#define GNTTABOP_unmap_grant_ref 1 struct gnttab_unmap_grant_ref { /* IN parameters. */ uint64_t host_addr; uint64_t dev_bus_addr; grant_handle_t handle; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); @@ -230,13 +379,12 @@ * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. * 3. Xen may not support more than a single grant-table page per domain. */ -#define GNTTABOP_setup_table 2 struct gnttab_setup_table { /* IN parameters. */ domid_t dom; uint32_t nr_frames; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ XEN_GUEST_HANDLE(ulong) frame_list; }; typedef struct gnttab_setup_table gnttab_setup_table_t; @@ -246,12 +394,11 @@ * GNTTABOP_dump_table: Dump the contents of the grant table to the * xen console. Debugging use only. */ -#define GNTTABOP_dump_table 3 struct gnttab_dump_table { /* IN parameters. */ domid_t dom; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_dump_table gnttab_dump_table_t; DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); @@ -260,11 +407,10 @@ * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The * foreign domain has previously registered its interest in the transfer via * . - * + * * Note that, even if the transfer fails, the specified page no longer belongs * to the calling domain *unless* the error is GNTST_bad_page. */ -#define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ xen_pfn_t mfn; @@ -299,9 +445,10 @@ #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) +#define _GNTCOPY_can_fail (2) +#define GNTCOPY_can_fail (1<<_GNTCOPY_can_fail) -#define GNTTABOP_copy 5 -typedef struct gnttab_copy { +struct gnttab_copy { /* IN parameters. */ struct { union { @@ -315,7 +462,8 @@ uint16_t flags; /* GNTCOPY_* */ /* OUT parameters. */ int16_t status; -} gnttab_copy_t; +}; +typedef struct gnttab_copy gnttab_copy_t; DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); /* @@ -325,14 +473,13 @@ * 1. may be specified as DOMID_SELF. * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. */ -#define GNTTABOP_query_size 6 struct gnttab_query_size { /* IN parameters. */ domid_t dom; /* OUT parameters. */ uint32_t nr_frames; uint32_t max_nr_frames; - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_query_size gnttab_query_size_t; DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); @@ -348,21 +495,87 @@ * 2. After executing a batch of unmaps, it is guaranteed that no stale * mappings will remain in the device or host TLBs. */ -#define GNTTABOP_unmap_and_replace 7 struct gnttab_unmap_and_replace { /* IN parameters. */ uint64_t host_addr; uint64_t new_addr; grant_handle_t handle; /* OUT parameters. */ - int16_t status; /* GNTST_* */ + int16_t status; /* => enum grant_status */ }; typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + /* - * Bitfield values for update_pin_status.flags. + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* => enum grant_status */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +/* + * GNTTABOP_swap_grant_ref: Swap the contents of two grant entries. + */ +struct gnttab_swap_grant_ref { + /* IN parameters */ + grant_ref_t ref_a; + grant_ref_t ref_b; + /* OUT parameters */ + int16_t status; /* => enum grant_status */ +}; +typedef struct gnttab_swap_grant_ref gnttab_swap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_swap_grant_ref_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) @@ -389,9 +602,20 @@ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ +/* ` enum grant_status { */ #define GNTST_okay (0) /* Normal return. */ #define GNTST_general_error (-1) /* General undefined error. */ #define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ @@ -404,6 +628,8 @@ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ #define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ #define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ +/* ` } */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -417,7 +643,8 @@ "permission denied", \ "bad page", \ "copy arguments cross page boundary", \ - "page address size too large" \ + "page address size too large", \ + "operation not done; try again" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/hvm_info_table.h#2 (text) ==== @@ -29,13 +29,44 @@ #define HVM_INFO_OFFSET 0x800 #define HVM_INFO_PADDR ((HVM_INFO_PFN << 12) + HVM_INFO_OFFSET) +/* Maximum we can support with current vLAPIC ID mapping. */ +#define HVM_MAX_VCPUS 128 + struct hvm_info_table { char signature[8]; /* "HVM INFO" */ uint32_t length; uint8_t checksum; - uint8_t acpi_enabled; + + /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */ uint8_t apic_mode; + + /* How many CPUs does this domain have? */ uint32_t nr_vcpus; + + /* + * MEMORY MAP provided by HVM domain builder. + * Notes: + * 1. page_to_phys(x) = x << 12 + * 2. If a field is zero, the corresponding range does not exist. + */ + /* + * 0x0 to page_to_phys(low_mem_pgend)-1: + * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF) + */ + uint32_t low_mem_pgend; + /* + * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF: + * Reserved for special memory mappings + */ + uint32_t reserved_mem_pgstart; + /* + * 0x100000000 to page_to_phys(high_mem_pgend)-1: + * RAM above 4GB + */ + uint32_t high_mem_pgend; + + /* Bitmap of which CPUs are online at boot time. */ + uint8_t vcpu_online[(HVM_MAX_VCPUS + 7)/8]; }; #endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/hvm_op.h#2 (text) ==== @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include + /* Get/set subcommands: extra argument == pointer to xen_hvm_param struct. */ #define HVMOP_set_param 0 #define HVMOP_get_param 1 @@ -73,6 +75,12 @@ /* Flushes all VCPU TLBs: @arg must be NULL. */ #define HVMOP_flush_tlbs 5 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; + /* Following tools-only interfaces may change in future. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) @@ -106,25 +114,149 @@ DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); #define HVMOP_set_mem_type 8 -typedef enum { - HVMMEM_ram_rw, /* Normal read/write guest RAM */ - HVMMEM_ram_ro, /* Read-only; writes are discarded */ - HVMMEM_mmio_dm, /* Reads and write go to the device model */ -} hvmmem_type_t; /* Notify that a region of memory is to be treated in a specific way. */ struct xen_hvm_set_mem_type { /* Domain to be updated. */ domid_t domid; /* Memory type */ - hvmmem_type_t hvmmem_type; + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; /* First pfn. */ uint64_aligned_t first_pfn; - /* Number of pages. */ - uint64_aligned_t nr; }; typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ + /* guest physical address of the toplevel pagetable dying */ + uint64_t gpa; +}; +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#define HVMOP_set_mem_access 12 +typedef enum { + HVMMEM_access_n, + HVMMEM_access_r, + HVMMEM_access_w, + HVMMEM_access_rw, + HVMMEM_access_x, + HVMMEM_access_rx, + HVMMEM_access_wx, + HVMMEM_access_rwx, + HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically + * change to r-w on a write */ + HVMMEM_access_n2rwx, /* Log access: starts off as n, automatically + * goes to rwx, generating an event without + * pausing the vcpu */ + HVMMEM_access_default /* Take the domain default */ +} hvmmem_access_t; +/* Notify that a region of memory is to have specific access types */ +struct xen_hvm_set_mem_access { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Number of pages, ignored on setting default access */ + uint32_t nr; + /* First pfn, or ~0ull to set the default access for new pages */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t); + +#define HVMOP_get_mem_access 13 +/* Get the specific access type for that region of memory */ +struct xen_hvm_get_mem_access { + /* Domain to be queried. */ + domid_t domid; + /* Memory type: OUT */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* pfn, or ~0ull for default access for new pages. IN */ + uint64_aligned_t pfn; +}; +typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t); + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Trap number */ + uint32_t trap; + /* Error code, or -1 to skip */ + uint32_t error_code; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + uint16_t mem_type; + uint16_t pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + uint64_t pfn; +}; +typedef struct xen_hvm_get_mem_type xen_hvm_get_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_type_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* MSI injection for emulated devices */ +#define HVMOP_inject_msi 16 +struct xen_hvm_inject_msi { + /* Domain to be injected */ + domid_t domid; + /* Data -- lower 32 bits */ + uint32_t data; + /* Address (0xfeexxxxx) */ + uint64_t addr; +}; +typedef struct xen_hvm_inject_msi xen_hvm_inject_msi_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_msi_t); #endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/ioreq.h#2 (text) ==== @@ -43,32 +43,24 @@ * virq */ struct ioreq { - uint64_t addr; /* physical address */ - uint64_t size; /* size in bytes */ - uint64_t count; /* for rep prefixes */ - uint64_t data; /* data (or paddr of data) */ + uint64_t addr; /* physical address */ + uint64_t data; /* data (or paddr of data) */ + uint32_t count; /* for rep prefixes */ + uint32_t size; /* size in bytes */ + uint32_t vp_eport; /* evtchn for notifications to/from device model */ + uint16_t _pad0; uint8_t state:4; - uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr - * of the real data to use. */ - uint8_t dir:1; /* 1=read, 0=write */ + uint8_t data_is_ptr:1; /* if 1, data above is the guest paddr + * of the real data to use. */ + uint8_t dir:1; /* 1=read, 0=write */ uint8_t df:1; - uint8_t pad:1; - uint8_t type; /* I/O type */ - uint8_t _pad0[6]; - uint64_t io_count; /* How many IO done on a vcpu */ + uint8_t _pad1:1; + uint8_t type; /* I/O type */ }; typedef struct ioreq ioreq_t; -struct vcpu_iodata { - struct ioreq vp_ioreq; - /* Event channel port, used for notifications to/from the device model. */ - uint32_t vp_eport; - uint32_t _pad0; -}; -typedef struct vcpu_iodata vcpu_iodata_t; - struct shared_iopage { - struct vcpu_iodata vcpu_iodata[1]; + struct ioreq vcpu_ioreq[1]; }; typedef struct shared_iopage shared_iopage_t; @@ -108,11 +100,32 @@ }; #endif /* defined(__ia64__) */ -#define ACPI_PM1A_EVT_BLK_ADDRESS 0x0000000000001f40 -#define ACPI_PM1A_CNT_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x04) -#define ACPI_PM_TMR_BLK_ADDRESS (ACPI_PM1A_EVT_BLK_ADDRESS + 0x08) -#define ACPI_GPE0_BLK_ADDRESS (ACPI_PM_TMR_BLK_ADDRESS + 0x20) -#define ACPI_GPE0_BLK_LEN 0x08 +/* + * ACPI Control/Event register locations. Location is controlled by a + * version number in HVM_PARAM_ACPI_IOPORTS_LOCATION. + */ + +/* Version 0 (default): Traditional Xen locations. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V0 0x1f40 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V0 (ACPI_PM1A_EVT_BLK_ADDRESS_V0 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V0 (ACPI_PM_TMR_BLK_ADDRESS_V0 + 0x20) +#define ACPI_GPE0_BLK_LEN_V0 0x08 + +/* Version 1: Locations preferred by modern Qemu. */ +#define ACPI_PM1A_EVT_BLK_ADDRESS_V1 0xb000 +#define ACPI_PM1A_CNT_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x04) +#define ACPI_PM_TMR_BLK_ADDRESS_V1 (ACPI_PM1A_EVT_BLK_ADDRESS_V1 + 0x08) +#define ACPI_GPE0_BLK_ADDRESS_V1 0xafe0 +#define ACPI_GPE0_BLK_LEN_V1 0x04 + +/* Compatibility definitions for the default location (version 0). */ +#define ACPI_PM1A_EVT_BLK_ADDRESS ACPI_PM1A_EVT_BLK_ADDRESS_V0 +#define ACPI_PM1A_CNT_BLK_ADDRESS ACPI_PM1A_CNT_BLK_ADDRESS_V0 +#define ACPI_PM_TMR_BLK_ADDRESS ACPI_PM_TMR_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_ADDRESS ACPI_GPE0_BLK_ADDRESS_V0 +#define ACPI_GPE0_BLK_LEN ACPI_GPE0_BLK_LEN_V0 + #endif /* _IOREQ_H_ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/params.h#2 (text) ==== @@ -33,6 +33,9 @@ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: * Domain = val[47:32], Bus = val[31:16], * DevFn = val[15: 8], IntX = val[ 1: 0] + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. * If val == 0 then CPU0 event-channel notifications are not delivered. */ #define HVM_PARAM_CALLBACK_IRQ 0 @@ -49,11 +52,19 @@ #define HVM_PARAM_IOREQ_PFN 5 #define HVM_PARAM_BUFIOREQ_PFN 6 +#define HVM_PARAM_BUFIOREQ_EVTCHN 26 #ifdef __ia64__ + #define HVM_PARAM_NVRAM_FD 7 #define HVM_PARAM_VHPT_SIZE 8 #define HVM_PARAM_BUFPIOREQ_PFN 9 + +#elif defined(__i386__) || defined(__x86_64__) + +/* Expose Viridian interfaces to this HVM guest? */ +#define HVM_PARAM_VIRIDIAN 9 + #endif /* @@ -93,32 +104,49 @@ /* ACPI S state: currently support S0 and S3 on x86. */ #define HVM_PARAM_ACPI_S_STATE 14 -#define HVM_NR_PARAMS 15 +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 -#ifdef XENHVM -/** - * Retrieve an HVM setting from the hypervisor. - * - * \param index The index of the HVM parameter to retrieve. - * - * \return On error, 0. Otherwise the value of the requested parameter. +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in */ -static inline unsigned long -hvm_get_parameter(int index) -{ - struct xen_hvm_param xhv; - int error; +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 +#define HVM_PARAM_MEMORY_EVENT_SINGLE_STEP 25 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +/* Boolean: Enable nestedhvm (hvm only) */ +#define HVM_PARAM_NESTEDHVM 24 + +/* Params for the mem event rings */ +#define HVM_PARAM_PAGING_RING_PFN 27 +#define HVM_PARAM_ACCESS_RING_PFN 28 +#define HVM_PARAM_SHARING_RING_PFN 29 - xhv.domid = DOMID_SELF; - xhv.index = index; - error = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); - if (error) { - printf("hvm_get_parameter: failed to get %d, error %d\n", - index, error); - return (0); - } - return (xhv.value); -} -#endif +#define HVM_NR_PARAMS 30 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/hvm/save.h#2 (text) ==== @@ -61,13 +61,36 @@ * ugliness. */ -#define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ - struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; } +#ifdef __XEN__ +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { return _fix(h); } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _ctype t; } + +# include /* BUG() */ +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + static inline int __HVM_SAVE_FIX_COMPAT_##_x(void *h) { BUG(); return -1; } \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];}; \ + struct __HVM_SAVE_TYPE_COMPAT_##_x { _type t; } +#else +# define DECLARE_HVM_SAVE_TYPE_COMPAT(_x, _code, _type, _ctype, _fix) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[2];} + +# define DECLARE_HVM_SAVE_TYPE(_x, _code, _type) \ + struct __HVM_SAVE_TYPE_##_x { _type t; char c[_code]; char cpt[1];} +#endif #define HVM_SAVE_TYPE(_x) typeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->t) #define HVM_SAVE_LENGTH(_x) (sizeof (HVM_SAVE_TYPE(_x))) #define HVM_SAVE_CODE(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->c)) +#ifdef __XEN__ +# define HVM_SAVE_TYPE_COMPAT(_x) typeof (((struct __HVM_SAVE_TYPE_COMPAT_##_x *)(0))->t) +# define HVM_SAVE_LENGTH_COMPAT(_x) (sizeof (HVM_SAVE_TYPE_COMPAT(_x))) + +# define HVM_SAVE_HAS_COMPAT(_x) (sizeof (((struct __HVM_SAVE_TYPE_##_x *)(0))->cpt)-1) +# define HVM_SAVE_FIX_COMPAT(_x, _dst) __HVM_SAVE_FIX_COMPAT_##_x(_dst) +#endif /* * The series of save records is teminated by a zero-type, zero-length @@ -78,9 +101,11 @@ DECLARE_HVM_SAVE_TYPE(END, 0, struct hvm_save_end); #if defined(__i386__) || defined(__x86_64__) -#include "../arch-x86/hvm/save.h" +#include ring[0]))) - + sizeof(((struct _s##_sring *)0)->ring[0]))) /* * The same for passing in an actual pointer instead of a name tag. */ @@ -120,7 +119,16 @@ struct __name##_sring { \ RING_IDX req_prod, req_event; \ RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ union __name##_sring_entry ring[1]; /* variable-length */ \ }; \ \ @@ -164,7 +172,8 @@ #define SHARED_RING_INIT(_s) do { \ (_s)->req_prod = (_s)->rsp_prod = 0; \ (_s)->req_event = (_s)->rsp_event = 1; \ - (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) #define FRONT_RING_INIT(_r, _s, __size) do { \ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/io/xs_wire.h#2 (text) ==== @@ -47,7 +47,9 @@ XS_ERROR, XS_IS_DOMAIN_INTRODUCED, XS_RESUME, - XS_SET_TARGET + XS_SET_TARGET, + XS_RESTRICT, + XS_RESET_WATCHES }; #define XS_WRITE_NONE "NONE" @@ -60,6 +62,7 @@ int errnum; const char *errstring; }; +#ifdef EINVAL #define XSD_ERROR(x) { x, #x } /* LINTED: static unused */ static struct xsd_errors xsd_errors[] @@ -82,6 +85,7 @@ XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN) }; +#endif struct xsd_sockmsg { ==== //depot/SpectraBSD/stable/9/sys/xen/interface/kexec.h#2 (text) ==== @@ -155,27 +155,6 @@ unsigned long start; } xen_kexec_range_t; -/* vmcoreinfo stuff */ -#define VMCOREINFO_BYTES (4096) -#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN" -void arch_crash_save_vmcoreinfo(void); -void vmcoreinfo_append_str(const char *fmt, ...) - __attribute__ ((format (printf, 1, 2))); -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \ - (unsigned long)offsetof(struct name, field)) - #endif /* _XEN_PUBLIC_KEXEC_H */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/memory.h#2 (text) ==== @@ -27,6 +27,8 @@ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ +#include + /* * Increase or decrease the specified domain's memory reservation. Returns the * number of extents successfully allocated or freed. @@ -48,6 +50,11 @@ /* NUMA node to allocate from. */ #define XENMEMF_node(x) (((x) + 1) << 8) #define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) #endif struct xen_memory_reservation { @@ -201,12 +208,18 @@ /* Which domain to change the mapping for. */ domid_t domid; + /* Number of pages to go through for gmfn_range */ + uint16_t size; + /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ -#define XENMAPSPACE_mfn 2 /* usual MFN */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range */ unsigned int space; +#define XENMAPIDX_grant_table_status 0x80000000 + /* Index into source mapping space. */ xen_ulong_t idx; @@ -232,29 +245,8 @@ typedef struct xen_remove_from_physmap xen_remove_from_physmap_t; DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t); -/* - * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error - * code on failure. This call only works for auto-translated guests. - */ -#define XENMEM_translate_gpfn_list 8 -struct xen_translate_gpfn_list { - /* Which domain to translate for? */ - domid_t domid; - - /* Length of list. */ - xen_ulong_t nr_gpfns; - - /* List of GPFNs to translate. */ - XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list; - - /* - * Output list to contain MFN translations. May be the same as the input - * list (in which case each input GPFN is overwritten with the output MFN). - */ - XEN_GUEST_HANDLE(xen_pfn_t) mfn_list; -}; -typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t; -DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t); +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ /* * Returns the pseudo-physical memory map as it was when the domain @@ -299,6 +291,114 @@ typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 +#define XENMEM_get_sharing_shared_pages 19 + +#define XENMEM_paging_op 20 +#define XENMEM_paging_op_nominate 0 +#define XENMEM_paging_op_evict 1 +#define XENMEM_paging_op_prep 2 + +#define XENMEM_access_op 21 +#define XENMEM_access_op_resume 0 + +struct xen_mem_event_op { + uint8_t op; /* XENMEM_*_op_* */ + domid_t domain; + + + /* PAGING_PREP IN: buffer to immediately fill page in */ + uint64_aligned_t buffer; + /* Other OPs */ + uint64_aligned_t gfn; /* IN: gfn of page being operated on */ +}; +typedef struct xen_mem_event_op xen_mem_event_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_event_op_t); + +#define XENMEM_sharing_op 22 +#define XENMEM_sharing_op_nominate_gfn 0 +#define XENMEM_sharing_op_nominate_gref 1 +#define XENMEM_sharing_op_share 2 +#define XENMEM_sharing_op_resume 3 +#define XENMEM_sharing_op_debug_gfn 4 +#define XENMEM_sharing_op_debug_mfn 5 +#define XENMEM_sharing_op_debug_gref 6 +#define XENMEM_sharing_op_add_physmap 7 +#define XENMEM_sharing_op_audit 8 + +#define XENMEM_SHARING_OP_S_HANDLE_INVALID (-10) +#define XENMEM_SHARING_OP_C_HANDLE_INVALID (-9) + +/* The following allows sharing of grant refs. This is useful + * for sharing utilities sitting as "filters" in IO backends + * (e.g. memshr + blktap(2)). The IO backend is only exposed + * to grant references, and this allows sharing of the grefs */ +#define XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG (1ULL << 62) + +#define XENMEM_SHARING_OP_FIELD_MAKE_GREF(field, val) \ + (field) = (XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG | val) +#define XENMEM_SHARING_OP_FIELD_IS_GREF(field) \ + ((field) & XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG) +#define XENMEM_SHARING_OP_FIELD_GET_GREF(field) \ + ((field) & (~XENMEM_SHARING_OP_FIELD_IS_GREF_FLAG)) + +struct xen_mem_sharing_op { + uint8_t op; /* XENMEM_sharing_op_* */ + domid_t domain; + + union { + struct mem_sharing_op_nominate { /* OP_NOMINATE_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to nominate */ + uint32_t grant_ref; /* IN: grant ref to nominate */ + } u; + uint64_aligned_t handle; /* OUT: the handle */ + } nominate; + struct mem_sharing_op_share { /* OP_SHARE/ADD_PHYSMAP */ + uint64_aligned_t source_gfn; /* IN: the gfn of the source page */ + uint64_aligned_t source_handle; /* IN: handle to the source page */ + uint64_aligned_t client_gfn; /* IN: the client gfn */ + uint64_aligned_t client_handle; /* IN: handle to the client page */ + domid_t client_domain; /* IN: the client domain id */ + } share; + struct mem_sharing_op_debug { /* OP_DEBUG_xxx */ + union { + uint64_aligned_t gfn; /* IN: gfn to debug */ + uint64_aligned_t mfn; /* IN: mfn to debug */ + uint32_t gref; /* IN: gref to debug */ + } u; + } debug; + } u; +}; +typedef struct xen_mem_sharing_op xen_mem_sharing_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_mem_sharing_op_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_MEMORY_H__ */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/physdev.h#2 (text) ==== @@ -41,6 +41,29 @@ DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); /* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v1 17 +/* + * Register a shared page for the hypervisor to indicate whether the + * guest must issue PHYSDEVOP_eoi. This hypercall is very similar to + * PHYSDEVOP_pirq_eoi_gmfn_v1 but it doesn't change the semantics of + * PHYSDEVOP_eoi. The page registered is used as a bit array indexed by + * Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn_v2 28 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); + +/* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ @@ -125,6 +148,7 @@ #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 #define MAP_PIRQ_TYPE_UNKNOWN 0x2 +#define MAP_PIRQ_TYPE_MSI_SEG 0x3 #define PHYSDEVOP_map_pirq 13 struct physdev_map_pirq { @@ -135,7 +159,7 @@ int index; /* IN or OUT */ int pirq; - /* IN */ + /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */ int bus; /* IN */ int devfn; @@ -168,6 +192,31 @@ typedef struct physdev_manage_pci physdev_manage_pci_t; DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +}; + +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. @@ -185,6 +234,82 @@ typedef struct physdev_op physdev_op_t; DEFINE_XEN_GUEST_HANDLE(physdev_op_t); +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ +}; + +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + +/* leave PHYSDEVOP 22 free */ + +/* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI + * the hypercall returns a free pirq */ +#define PHYSDEVOP_get_free_pirq 23 +struct physdev_get_free_pirq { + /* IN */ + int type; + /* OUT */ + uint32_t pirq; +}; + +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + +#define XEN_PCI_MMCFG_RESERVED 0x1 + +#define PHYSDEVOP_pci_mmcfg_reserved 24 +struct physdev_pci_mmcfg_reserved { + uint64_t address; + uint16_t segment; + uint8_t start_bus; + uint8_t end_bus; + uint32_t flags; +}; +typedef struct physdev_pci_mmcfg_reserved physdev_pci_mmcfg_reserved_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_mmcfg_reserved_t); + +#define XEN_PCI_DEV_EXTFN 0x1 +#define XEN_PCI_DEV_VIRTFN 0x2 +#define XEN_PCI_DEV_PXM 0x4 + +#define PHYSDEVOP_pci_device_add 25 +struct physdev_pci_device_add { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; + uint32_t flags; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + uint32_t optarr[]; +#elif defined(__GNUC__) + uint32_t optarr[0]; +#endif +}; +typedef struct physdev_pci_device_add physdev_pci_device_add_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_add_t); + +#define PHYSDEVOP_pci_device_remove 26 +#define PHYSDEVOP_restore_msi_ext 27 +struct physdev_pci_device { + /* IN */ + uint16_t seg; + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_pci_device physdev_pci_device_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pci_device_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** @@ -206,6 +331,12 @@ #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi #define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#if __XEN_INTERFACE_VERSION__ < 0x00040200 +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v1 +#else +#define PHYSDEVOP_pirq_eoi_gmfn PHYSDEVOP_pirq_eoi_gmfn_v2 +#endif + #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/platform.h#2 (text) ==== @@ -114,10 +114,110 @@ typedef struct xenpf_platform_quirk xenpf_platform_quirk_t; DEFINE_XEN_GUEST_HANDLE(xenpf_platform_quirk_t); +#define XENPF_efi_runtime_call 49 +#define XEN_EFI_get_time 1 +#define XEN_EFI_set_time 2 +#define XEN_EFI_get_wakeup_time 3 +#define XEN_EFI_set_wakeup_time 4 +#define XEN_EFI_get_next_high_monotonic_count 5 +#define XEN_EFI_get_variable 6 +#define XEN_EFI_set_variable 7 +#define XEN_EFI_get_next_variable_name 8 +#define XEN_EFI_query_variable_info 9 +#define XEN_EFI_query_capsule_capabilities 10 +#define XEN_EFI_update_capsule 11 +struct xenpf_efi_runtime_call { + uint32_t function; + /* + * This field is generally used for per sub-function flags (defined + * below), except for the XEN_EFI_get_next_high_monotonic_count case, + * where it holds the single returned value. + */ + uint32_t misc; + unsigned long status; + union { +#define XEN_EFI_GET_TIME_SET_CLEARS_NS 0x00000001 + struct { + struct xenpf_efi_time { + uint16_t year; + uint8_t month; + uint8_t day; + uint8_t hour; + uint8_t min; + uint8_t sec; + uint32_t ns; + int16_t tz; + uint8_t daylight; + } time; + uint32_t resolution; + uint32_t accuracy; + } get_time; + + struct xenpf_efi_time set_time; + +#define XEN_EFI_GET_WAKEUP_TIME_ENABLED 0x00000001 +#define XEN_EFI_GET_WAKEUP_TIME_PENDING 0x00000002 + struct xenpf_efi_time get_wakeup_time; + +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE 0x00000001 +#define XEN_EFI_SET_WAKEUP_TIME_ENABLE_ONLY 0x00000002 + struct xenpf_efi_time set_wakeup_time; + +#define XEN_EFI_VARIABLE_NON_VOLATILE 0x00000001 +#define XEN_EFI_VARIABLE_BOOTSERVICE_ACCESS 0x00000002 +#define XEN_EFI_VARIABLE_RUNTIME_ACCESS 0x00000004 + struct { + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + unsigned long size; + XEN_GUEST_HANDLE(void) data; + struct xenpf_efi_guid { + uint32_t data1; + uint16_t data2; + uint16_t data3; + uint8_t data4[8]; + } vendor_guid; + } get_variable, set_variable; + + struct { + unsigned long size; + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + struct xenpf_efi_guid vendor_guid; + } get_next_variable_name; + + struct { + uint32_t attr; + uint64_t max_store_size; + uint64_t remain_store_size; + uint64_t max_size; + } query_variable_info; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t max_capsule_size; + unsigned int reset_type; + } query_capsule_capabilities; + + struct { + XEN_GUEST_HANDLE(void) capsule_header_array; + unsigned long capsule_count; + uint64_t sg_list; /* machine address */ + } update_capsule; + } u; +}; +typedef struct xenpf_efi_runtime_call xenpf_efi_runtime_call_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_efi_runtime_call_t); + #define XENPF_firmware_info 50 #define XEN_FW_DISK_INFO 1 /* from int 13 AH=08/41/48 */ #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */ #define XEN_FW_VBEDDC_INFO 3 /* from int 10 AX=4f15 */ +#define XEN_FW_EFI_INFO 4 /* from EFI */ +#define XEN_FW_EFI_VERSION 0 +#define XEN_FW_EFI_CONFIG_TABLE 1 +#define XEN_FW_EFI_VENDOR 2 +#define XEN_FW_EFI_MEM_INFO 3 +#define XEN_FW_EFI_RT_VERSION 4 struct xenpf_firmware_info { /* IN variables. */ uint32_t type; @@ -148,6 +248,24 @@ /* must refer to 128-byte buffer */ XEN_GUEST_HANDLE(uint8) edid; } vbeddc_info; /* XEN_FW_VBEDDC_INFO */ + union xenpf_efi_info { + uint32_t version; + struct { + uint64_t addr; /* EFI_CONFIGURATION_TABLE */ + uint32_t nent; + } cfg; + struct { + uint32_t revision; + uint32_t bufsz; /* input, in bytes */ + XEN_GUEST_HANDLE(void) name; /* UCS-2/UTF-16 string */ + } vendor; + struct { + uint64_t addr; + uint64_t size; + uint64_t attr; + uint32_t type; + } mem; + } efi_info; /* XEN_FW_EFI_INFO */ } u; }; typedef struct xenpf_firmware_info xenpf_firmware_info_t; @@ -210,6 +328,7 @@ #define XEN_PM_CX 0 #define XEN_PM_PX 1 #define XEN_PM_TX 2 +#define XEN_PM_PDC 3 /* Px sub info type */ #define XEN_PX_PCT 1 @@ -307,11 +426,88 @@ union { struct xen_processor_power power;/* Cx: _CST/_CSD */ struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */ + XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */ } u; }; typedef struct xenpf_set_processor_pminfo xenpf_set_processor_pminfo_t; DEFINE_XEN_GUEST_HANDLE(xenpf_set_processor_pminfo_t); +#define XENPF_get_cpuinfo 55 +struct xenpf_pcpuinfo { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; +#define XEN_PCPU_FLAGS_ONLINE 1 + /* Correponding xen_cpuid is not present*/ +#define XEN_PCPU_FLAGS_INVALID 2 + uint32_t flags; + uint32_t apic_id; + uint32_t acpi_id; +}; +typedef struct xenpf_pcpuinfo xenpf_pcpuinfo_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpuinfo_t); + +#define XENPF_get_cpu_version 48 +struct xenpf_pcpu_version { + /* IN */ + uint32_t xen_cpuid; + /* OUT */ + /* The maxium cpu_id that is present */ + uint32_t max_present; + char vendor_id[12]; + uint32_t family; + uint32_t model; + uint32_t stepping; +}; +typedef struct xenpf_pcpu_version xenpf_pcpu_version_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_pcpu_version_t); + +#define XENPF_cpu_online 56 +#define XENPF_cpu_offline 57 +struct xenpf_cpu_ol +{ + uint32_t cpuid; +}; +typedef struct xenpf_cpu_ol xenpf_cpu_ol_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_cpu_ol_t); + +#define XENPF_cpu_hotadd 58 +struct xenpf_cpu_hotadd +{ + uint32_t apic_id; + uint32_t acpi_id; + uint32_t pxm; +}; + +#define XENPF_mem_hotadd 59 +struct xenpf_mem_hotadd +{ + uint64_t spfn; + uint64_t epfn; + uint32_t pxm; + uint32_t flags; +}; + +#define XENPF_core_parking 60 + +#define XEN_CORE_PARKING_SET 1 +#define XEN_CORE_PARKING_GET 2 +struct xenpf_core_parking { + /* IN variables */ + uint32_t type; + /* IN variables: set cpu nums expected to be idled */ + /* OUT variables: get cpu nums actually be idled */ + uint32_t idle_nums; +}; +typedef struct xenpf_core_parking xenpf_core_parking_t; +DEFINE_XEN_GUEST_HANDLE(xenpf_core_parking_t); + +/* + * ` enum neg_errnoval + * ` HYPERVISOR_platform_op(const struct xen_platform_op*); + */ struct xen_platform_op { uint32_t cmd; uint32_t interface_version; /* XENPF_INTERFACE_VERSION */ @@ -322,11 +518,18 @@ struct xenpf_read_memtype read_memtype; struct xenpf_microcode_update microcode; struct xenpf_platform_quirk platform_quirk; + struct xenpf_efi_runtime_call efi_runtime_call; struct xenpf_firmware_info firmware_info; struct xenpf_enter_acpi_sleep enter_acpi_sleep; struct xenpf_change_freq change_freq; struct xenpf_getidletime getidletime; struct xenpf_set_processor_pminfo set_pminfo; + struct xenpf_pcpuinfo pcpu_info; + struct xenpf_pcpu_version pcpu_version; + struct xenpf_cpu_ol cpu_ol; + struct xenpf_cpu_hotadd cpu_add; + struct xenpf_mem_hotadd mem_add; + struct xenpf_core_parking core_parking; uint8_t pad[128]; } u; }; ==== //depot/SpectraBSD/stable/9/sys/xen/interface/sched.h#2 (text) ==== @@ -99,6 +99,29 @@ DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); /* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); + +/* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control * software to determine the appropriate action. For the most part, Xen does * not care about the shutdown code. @@ -107,6 +130,7 @@ #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ #endif /* __XEN_PUBLIC_SCHED_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/sysctl.h#2 (text) ==== @@ -34,12 +34,12 @@ #include "xen.h" #include "domctl.h" -#define XEN_SYSCTL_INTERFACE_VERSION 0x00000006 +#define XEN_SYSCTL_INTERFACE_VERSION 0x00000009 /* * Read console content from Xen buffer ring. */ -#define XEN_SYSCTL_readconsole 1 +/* XEN_SYSCTL_readconsole */ struct xen_sysctl_readconsole { /* IN: Non-zero -> clear after reading. */ uint8_t clear; @@ -60,7 +60,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_readconsole_t); /* Get trace buffers machine base address */ -#define XEN_SYSCTL_tbuf_op 2 +/* XEN_SYSCTL_tbuf_op */ struct xen_sysctl_tbuf_op { /* IN variables */ #define XEN_SYSCTL_TBUFOP_get_info 0 @@ -75,7 +75,7 @@ uint32_t evt_mask; /* OUT variables */ uint64_aligned_t buffer_mfn; - uint32_t size; + uint32_t size; /* Also an IN variable! */ }; typedef struct xen_sysctl_tbuf_op xen_sysctl_tbuf_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_tbuf_op_t); @@ -83,7 +83,7 @@ /* * Get physical information about the host machine */ -#define XEN_SYSCTL_physinfo 3 +/* XEN_SYSCTL_physinfo */ /* (x86) The platform supports HVM guests. */ #define _XEN_SYSCTL_PHYSCAP_hvm 0 #define XEN_SYSCTL_PHYSCAP_hvm (1u<<_XEN_SYSCTL_PHYSCAP_hvm) @@ -93,30 +93,16 @@ struct xen_sysctl_physinfo { uint32_t threads_per_core; uint32_t cores_per_socket; - uint32_t nr_cpus; - uint32_t nr_nodes; + uint32_t nr_cpus; /* # CPUs currently online */ + uint32_t max_cpu_id; /* Largest possible CPU ID on this host */ + uint32_t nr_nodes; /* # nodes currently online */ + uint32_t max_node_id; /* Largest possible node ID on this host */ uint32_t cpu_khz; uint64_aligned_t total_pages; uint64_aligned_t free_pages; uint64_aligned_t scrub_pages; uint32_t hw_cap[8]; - /* - * IN: maximum addressable entry in the caller-provided cpu_to_node array. - * OUT: largest cpu identifier in the system. - * If OUT is greater than IN then the cpu_to_node array is truncated! - */ - uint32_t max_cpu_id; - /* - * If not NULL, this array is filled with node identifier for each cpu. - * If a cpu has no node information (e.g., cpu not present) then the - * sentinel value ~0u is written. - * The size of this array is specified by the caller in @max_cpu_id. - * If the actual @max_cpu_id is smaller than the array then the trailing - * elements of the array will not be written by the sysctl. - */ - XEN_GUEST_HANDLE_64(uint32) cpu_to_node; - /* XEN_SYSCTL_PHYSCAP_??? */ uint32_t capabilities; }; @@ -126,7 +112,7 @@ /* * Get the ID of the current scheduler. */ -#define XEN_SYSCTL_sched_id 4 +/* XEN_SYSCTL_sched_id */ struct xen_sysctl_sched_id { /* OUT variable */ uint32_t sched_id; @@ -135,7 +121,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_sched_id_t); /* Interface for controlling Xen software performance counters. */ -#define XEN_SYSCTL_perfc_op 5 +/* XEN_SYSCTL_perfc_op */ /* Sub-operations: */ #define XEN_SYSCTL_PERFCOP_reset 1 /* Reset all counters to zero. */ #define XEN_SYSCTL_PERFCOP_query 2 /* Get perfctr information. */ @@ -162,7 +148,7 @@ typedef struct xen_sysctl_perfc_op xen_sysctl_perfc_op_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_perfc_op_t); -#define XEN_SYSCTL_getdomaininfolist 6 +/* XEN_SYSCTL_getdomaininfolist */ struct xen_sysctl_getdomaininfolist { /* IN variables. */ domid_t first_domain; @@ -175,7 +161,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getdomaininfolist_t); /* Inject debug keys into Xen. */ -#define XEN_SYSCTL_debug_keys 7 +/* XEN_SYSCTL_debug_keys */ struct xen_sysctl_debug_keys { /* IN variables. */ XEN_GUEST_HANDLE_64(char) keys; @@ -185,7 +171,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_sysctl_debug_keys_t); /* Get physical CPU information. */ -#define XEN_SYSCTL_getcpuinfo 8 +/* XEN_SYSCTL_getcpuinfo */ struct xen_sysctl_cpuinfo { uint64_aligned_t idletime; }; @@ -201,7 +187,7 @@ typedef struct xen_sysctl_getcpuinfo xen_sysctl_getcpuinfo_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_getcpuinfo_t); -#define XEN_SYSCTL_availheap 9 +/* XEN_SYSCTL_availheap */ struct xen_sysctl_availheap { /* IN variables. */ uint32_t min_bitwidth; /* Smallest address width (zero if don't care). */ @@ -213,7 +199,7 @@ typedef struct xen_sysctl_availheap xen_sysctl_availheap_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_availheap_t); -#define XEN_SYSCTL_get_pmstat 10 +/* XEN_SYSCTL_get_pmstat */ struct pm_px_val { uint64_aligned_t freq; /* Px core frequency */ uint64_aligned_t residency; /* Px residency time */ @@ -239,6 +225,13 @@ uint64_aligned_t idle_time; /* idle time from boot */ XEN_GUEST_HANDLE_64(uint64) triggers; /* Cx trigger counts */ XEN_GUEST_HANDLE_64(uint64) residencies; /* Cx residencies */ + uint64_aligned_t pc2; + uint64_aligned_t pc3; + uint64_aligned_t pc6; + uint64_aligned_t pc7; + uint64_aligned_t cc3; + uint64_aligned_t cc6; + uint64_aligned_t cc7; }; struct xen_sysctl_get_pmstat { @@ -262,7 +255,7 @@ typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t); -#define XEN_SYSCTL_cpu_hotplug 11 +/* XEN_SYSCTL_cpu_hotplug */ struct xen_sysctl_cpu_hotplug { /* IN variables */ uint32_t cpu; /* Physical cpu. */ @@ -273,14 +266,362 @@ typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t; DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t); +/* + * Get/set xen power management, include + * 1. cpufreq governors and related parameters + */ +/* XEN_SYSCTL_pm_op */ +struct xen_userspace { + uint32_t scaling_setspeed; +}; +typedef struct xen_userspace xen_userspace_t; + +struct xen_ondemand { + uint32_t sampling_rate_max; + uint32_t sampling_rate_min; + + uint32_t sampling_rate; + uint32_t up_threshold; +}; +typedef struct xen_ondemand xen_ondemand_t; + +/* + * cpufreq para name of this structure named + * same as sysfs file name of native linux + */ +#define CPUFREQ_NAME_LEN 16 +struct xen_get_cpufreq_para { + /* IN/OUT variable */ + uint32_t cpu_num; + uint32_t freq_num; + uint32_t gov_num; + + /* for all governors */ + /* OUT variable */ + XEN_GUEST_HANDLE_64(uint32) affected_cpus; + XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies; + XEN_GUEST_HANDLE_64(char) scaling_available_governors; + char scaling_driver[CPUFREQ_NAME_LEN]; + + uint32_t cpuinfo_cur_freq; + uint32_t cpuinfo_max_freq; + uint32_t cpuinfo_min_freq; + uint32_t scaling_cur_freq; + + char scaling_governor[CPUFREQ_NAME_LEN]; + uint32_t scaling_max_freq; + uint32_t scaling_min_freq; + + /* for specific governor */ + union { + struct xen_userspace userspace; + struct xen_ondemand ondemand; + } u; + + int32_t turbo_enabled; +}; + +struct xen_set_cpufreq_gov { + char scaling_governor[CPUFREQ_NAME_LEN]; +}; + +struct xen_set_cpufreq_para { + #define SCALING_MAX_FREQ 1 + #define SCALING_MIN_FREQ 2 + #define SCALING_SETSPEED 3 + #define SAMPLING_RATE 4 + #define UP_THRESHOLD 5 + + uint32_t ctrl_type; + uint32_t ctrl_value; +}; + +struct xen_sysctl_pm_op { + #define PM_PARA_CATEGORY_MASK 0xf0 + #define CPUFREQ_PARA 0x10 + + /* cpufreq command type */ + #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01) + #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02) + #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03) + #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04) + + /* set/reset scheduler power saving option */ + #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21 + + /* cpuidle max_cstate access command */ + #define XEN_SYSCTL_pm_op_get_max_cstate 0x22 + #define XEN_SYSCTL_pm_op_set_max_cstate 0x23 + + /* set scheduler migration cost value */ + #define XEN_SYSCTL_pm_op_set_vcpu_migration_delay 0x24 + #define XEN_SYSCTL_pm_op_get_vcpu_migration_delay 0x25 + + /* enable/disable turbo mode when in dbs governor */ + #define XEN_SYSCTL_pm_op_enable_turbo 0x26 + #define XEN_SYSCTL_pm_op_disable_turbo 0x27 + + uint32_t cmd; + uint32_t cpuid; + union { + struct xen_get_cpufreq_para get_para; + struct xen_set_cpufreq_gov set_gov; + struct xen_set_cpufreq_para set_para; + uint64_aligned_t get_avgfreq; + uint32_t set_sched_opt_smt; + uint32_t get_max_cstate; + uint32_t set_max_cstate; + uint32_t get_vcpu_migration_delay; + uint32_t set_vcpu_migration_delay; + } u; +}; + +/* XEN_SYSCTL_page_offline_op */ +struct xen_sysctl_page_offline_op { + /* IN: range of page to be offlined */ +#define sysctl_page_offline 1 +#define sysctl_page_online 2 +#define sysctl_query_page_offline 3 + uint32_t cmd; + uint32_t start; + uint32_t end; + /* OUT: result of page offline request */ + /* + * bit 0~15: result flags + * bit 16~31: owner + */ + XEN_GUEST_HANDLE(uint32) status; +}; + +#define PG_OFFLINE_STATUS_MASK (0xFFUL) + +/* The result is invalid, i.e. HV does not handle it */ +#define PG_OFFLINE_INVALID (0x1UL << 0) + +#define PG_OFFLINE_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_PENDING (0x1UL << 2) +#define PG_OFFLINE_FAILED (0x1UL << 3) +#define PG_OFFLINE_AGAIN (0x1UL << 4) + +#define PG_ONLINE_FAILED PG_OFFLINE_FAILED +#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED + +#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1) +#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2) +#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3) +#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4) + +#define PG_OFFLINE_MISC_MASK (0xFFUL << 4) + +/* valid when PG_OFFLINE_FAILED or PG_OFFLINE_PENDING */ +#define PG_OFFLINE_XENPAGE (0x1UL << 8) +#define PG_OFFLINE_DOM0PAGE (0x1UL << 9) +#define PG_OFFLINE_ANONYMOUS (0x1UL << 10) +#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11) +#define PG_OFFLINE_OWNED (0x1UL << 12) + +#define PG_OFFLINE_BROKEN (0x1UL << 13) +#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN + +#define PG_OFFLINE_OWNER_SHIFT 16 + +/* XEN_SYSCTL_lockprof_op */ +/* Sub-operations: */ +#define XEN_SYSCTL_LOCKPROF_reset 1 /* Reset all profile data to zero. */ +#define XEN_SYSCTL_LOCKPROF_query 2 /* Get lock profile information. */ +/* Record-type: */ +#define LOCKPROF_TYPE_GLOBAL 0 /* global lock, idx meaningless */ +#define LOCKPROF_TYPE_PERDOM 1 /* per-domain lock, idx is domid */ +#define LOCKPROF_TYPE_N 2 /* number of types */ +struct xen_sysctl_lockprof_data { + char name[40]; /* lock name (may include up to 2 %d specifiers) */ + int32_t type; /* LOCKPROF_TYPE_??? */ + int32_t idx; /* index (e.g. domain id) */ + uint64_aligned_t lock_cnt; /* # of locking succeeded */ + uint64_aligned_t block_cnt; /* # of wait for lock */ + uint64_aligned_t lock_time; /* nsecs lock held */ + uint64_aligned_t block_time; /* nsecs waited for lock */ +}; +typedef struct xen_sysctl_lockprof_data xen_sysctl_lockprof_data_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_data_t); +struct xen_sysctl_lockprof_op { + /* IN variables. */ + uint32_t cmd; /* XEN_SYSCTL_LOCKPROF_??? */ + uint32_t max_elem; /* size of output buffer */ + /* OUT variables (query only). */ + uint32_t nr_elem; /* number of elements available */ + uint64_aligned_t time; /* nsecs of profile measurement */ + /* profile information (or NULL) */ + XEN_GUEST_HANDLE_64(xen_sysctl_lockprof_data_t) data; +}; +typedef struct xen_sysctl_lockprof_op xen_sysctl_lockprof_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_lockprof_op_t); + +/* XEN_SYSCTL_topologyinfo */ +#define INVALID_TOPOLOGY_ID (~0U) +struct xen_sysctl_topologyinfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest cpu identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + * If OUT is leass than IN then the array tails are not written by sysctl. + */ + uint32_t max_cpu_index; + + /* + * If not NULL, these arrays are filled with core/socket/node identifier + * for each cpu. + * If a cpu has no core/socket/node information (e.g., cpu not present) + * then the sentinel value ~0u is written to each array. + * The number of array elements written by the sysctl is: + * min(@max_cpu_index_IN,@max_cpu_index_OUT)+1 + */ + XEN_GUEST_HANDLE_64(uint32) cpu_to_core; + XEN_GUEST_HANDLE_64(uint32) cpu_to_socket; + XEN_GUEST_HANDLE_64(uint32) cpu_to_node; +}; +typedef struct xen_sysctl_topologyinfo xen_sysctl_topologyinfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_topologyinfo_t); + +/* XEN_SYSCTL_numainfo */ +struct xen_sysctl_numainfo { + /* + * IN: maximum addressable entry in the caller-provided arrays. + * OUT: largest node identifier in the system. + * If OUT is greater than IN then the arrays are truncated! + */ + uint32_t max_node_index; + + /* NB. Entries are 0 if node is not present. */ + XEN_GUEST_HANDLE_64(uint64) node_to_memsize; + XEN_GUEST_HANDLE_64(uint64) node_to_memfree; + + /* + * Array, of size (max_node_index+1)^2, listing memory access distances + * between nodes. If an entry has no node distance information (e.g., node + * not present) then the value ~0u is written. + * + * Note that the array rows must be indexed by multiplying by the minimum + * of the caller-provided max_node_index and the returned value of + * max_node_index. That is, if the largest node index in the system is + * smaller than the caller can handle, a smaller 2-d array is constructed + * within the space provided by the caller. When this occurs, trailing + * space provided by the caller is not modified. If the largest node index + * in the system is larger than the caller can handle, then a 2-d array of + * the maximum size handleable by the caller is constructed. + */ + XEN_GUEST_HANDLE_64(uint32) node_to_node_distance; +}; +typedef struct xen_sysctl_numainfo xen_sysctl_numainfo_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_numainfo_t); + +/* XEN_SYSCTL_cpupool_op */ +#define XEN_SYSCTL_CPUPOOL_OP_CREATE 1 /* C */ +#define XEN_SYSCTL_CPUPOOL_OP_DESTROY 2 /* D */ +#define XEN_SYSCTL_CPUPOOL_OP_INFO 3 /* I */ +#define XEN_SYSCTL_CPUPOOL_OP_ADDCPU 4 /* A */ +#define XEN_SYSCTL_CPUPOOL_OP_RMCPU 5 /* R */ +#define XEN_SYSCTL_CPUPOOL_OP_MOVEDOMAIN 6 /* M */ +#define XEN_SYSCTL_CPUPOOL_OP_FREEINFO 7 /* F */ +#define XEN_SYSCTL_CPUPOOL_PAR_ANY 0xFFFFFFFF +struct xen_sysctl_cpupool_op { + uint32_t op; /* IN */ + uint32_t cpupool_id; /* IN: CDIARM OUT: CI */ + uint32_t sched_id; /* IN: C OUT: I */ + uint32_t domid; /* IN: M */ + uint32_t cpu; /* IN: AR */ + uint32_t n_dom; /* OUT: I */ + struct xenctl_cpumap cpumap; /* OUT: IF */ +}; +typedef struct xen_sysctl_cpupool_op xen_sysctl_cpupool_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpupool_op_t); + +#define ARINC653_MAX_DOMAINS_PER_SCHEDULE 64 +/* + * This structure is used to pass a new ARINC653 schedule from a + * privileged domain (ie dom0) to Xen. + */ +struct xen_sysctl_arinc653_schedule { + /* major_frame holds the time for the new schedule's major frame + * in nanoseconds. */ + uint64_aligned_t major_frame; + /* num_sched_entries holds how many of the entries in the + * sched_entries[] array are valid. */ + uint8_t num_sched_entries; + /* The sched_entries array holds the actual schedule entries. */ + struct { + /* dom_handle must match a domain's UUID */ + xen_domain_handle_t dom_handle; + /* If a domain has multiple VCPUs, vcpu_id specifies which one + * this schedule entry applies to. It should be set to 0 if + * there is only one VCPU for the domain. */ + unsigned int vcpu_id; + /* runtime specifies the amount of time that should be allocated + * to this VCPU per major frame. It is specified in nanoseconds */ + uint64_aligned_t runtime; + } sched_entries[ARINC653_MAX_DOMAINS_PER_SCHEDULE]; +}; +typedef struct xen_sysctl_arinc653_schedule xen_sysctl_arinc653_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_arinc653_schedule_t); + +struct xen_sysctl_credit_schedule { + /* Length of timeslice in milliseconds */ +#define XEN_SYSCTL_CSCHED_TSLICE_MAX 1000 +#define XEN_SYSCTL_CSCHED_TSLICE_MIN 1 + unsigned tslice_ms; + /* Rate limit (minimum timeslice) in microseconds */ +#define XEN_SYSCTL_SCHED_RATELIMIT_MAX 500000 +#define XEN_SYSCTL_SCHED_RATELIMIT_MIN 100 + unsigned ratelimit_us; +}; +typedef struct xen_sysctl_credit_schedule xen_sysctl_credit_schedule_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_credit_schedule_t); + +/* XEN_SYSCTL_scheduler_op */ +/* Set or get info? */ +#define XEN_SYSCTL_SCHEDOP_putinfo 0 +#define XEN_SYSCTL_SCHEDOP_getinfo 1 +struct xen_sysctl_scheduler_op { + uint32_t cpupool_id; /* Cpupool whose scheduler is to be targetted. */ + uint32_t sched_id; /* XEN_SCHEDULER_* (domctl.h) */ + uint32_t cmd; /* XEN_SYSCTL_SCHEDOP_* */ + union { + struct xen_sysctl_sched_arinc653 { + XEN_GUEST_HANDLE_64(xen_sysctl_arinc653_schedule_t) schedule; + } sched_arinc653; + struct xen_sysctl_credit_schedule sched_credit; + } u; +}; +typedef struct xen_sysctl_scheduler_op xen_sysctl_scheduler_op_t; +DEFINE_XEN_GUEST_HANDLE(xen_sysctl_scheduler_op_t); struct xen_sysctl { uint32_t cmd; +#define XEN_SYSCTL_readconsole 1 +#define XEN_SYSCTL_tbuf_op 2 +#define XEN_SYSCTL_physinfo 3 +#define XEN_SYSCTL_sched_id 4 +#define XEN_SYSCTL_perfc_op 5 +#define XEN_SYSCTL_getdomaininfolist 6 +#define XEN_SYSCTL_debug_keys 7 +#define XEN_SYSCTL_getcpuinfo 8 +#define XEN_SYSCTL_availheap 9 +#define XEN_SYSCTL_get_pmstat 10 +#define XEN_SYSCTL_cpu_hotplug 11 +#define XEN_SYSCTL_pm_op 12 +#define XEN_SYSCTL_page_offline_op 14 +#define XEN_SYSCTL_lockprof_op 15 +#define XEN_SYSCTL_topologyinfo 16 +#define XEN_SYSCTL_numainfo 17 +#define XEN_SYSCTL_cpupool_op 18 +#define XEN_SYSCTL_scheduler_op 19 uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */ union { struct xen_sysctl_readconsole readconsole; struct xen_sysctl_tbuf_op tbuf_op; struct xen_sysctl_physinfo physinfo; + struct xen_sysctl_topologyinfo topologyinfo; + struct xen_sysctl_numainfo numainfo; struct xen_sysctl_sched_id sched_id; struct xen_sysctl_perfc_op perfc_op; struct xen_sysctl_getdomaininfolist getdomaininfolist; @@ -289,6 +630,11 @@ struct xen_sysctl_availheap availheap; struct xen_sysctl_get_pmstat get_pmstat; struct xen_sysctl_cpu_hotplug cpu_hotplug; + struct xen_sysctl_pm_op pm_op; + struct xen_sysctl_page_offline_op page_offline; + struct xen_sysctl_lockprof_op lockprof_op; + struct xen_sysctl_cpupool_op cpupool_op; + struct xen_sysctl_scheduler_op scheduler_op; uint8_t pad[128]; } u; }; ==== //depot/SpectraBSD/stable/9/sys/xen/interface/trace.h#2 (text) ==== @@ -38,6 +38,8 @@ #define TRC_MEM 0x0010f000 /* Xen memory trace */ #define TRC_PV 0x0020f000 /* Xen PV traces */ #define TRC_SHADOW 0x0040f000 /* Xen shadow tracing */ +#define TRC_HW 0x0080f000 /* Xen hardware-related traces */ +#define TRC_GUEST 0x0800f000 /* Guest-generated traces */ #define TRC_ALL 0x0ffff000 #define TRC_HD_TO_EVENT(x) ((x)&0x0fffffff) #define TRC_HD_CYCLE_FLAG (1UL<<31) @@ -52,14 +54,20 @@ #define TRC_HVM_HANDLER 0x00082000 /* various HVM handlers */ #define TRC_SCHED_MIN 0x00021000 /* Just runstate changes */ +#define TRC_SCHED_CLASS 0x00022000 /* Scheduler-specific */ #define TRC_SCHED_VERBOSE 0x00028000 /* More inclusive scheduling */ +/* Trace classes for Hardware */ +#define TRC_HW_PM 0x00801000 /* Power management traces */ +#define TRC_HW_IRQ 0x00802000 /* Traces relating to the handling of IRQs */ + /* Trace events per class */ #define TRC_LOST_RECORDS (TRC_GEN + 1) #define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2) #define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3) -#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1) +#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2) #define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1) #define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2) #define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3) @@ -75,10 +83,17 @@ #define TRC_SCHED_DOM_TIMER_FN (TRC_SCHED_VERBOSE + 13) #define TRC_SCHED_SWITCH_INFPREV (TRC_SCHED_VERBOSE + 14) #define TRC_SCHED_SWITCH_INFNEXT (TRC_SCHED_VERBOSE + 15) +#define TRC_SCHED_SHUTDOWN_CODE (TRC_SCHED_VERBOSE + 16) #define TRC_MEM_PAGE_GRANT_MAP (TRC_MEM + 1) #define TRC_MEM_PAGE_GRANT_UNMAP (TRC_MEM + 2) #define TRC_MEM_PAGE_GRANT_TRANSFER (TRC_MEM + 3) +#define TRC_MEM_SET_P2M_ENTRY (TRC_MEM + 4) +#define TRC_MEM_DECREASE_RESERVATION (TRC_MEM + 5) +#define TRC_MEM_POD_POPULATE (TRC_MEM + 16) +#define TRC_MEM_POD_ZERO_RECLAIM (TRC_MEM + 17) +#define TRC_MEM_POD_SUPERPAGE_SPLINTER (TRC_MEM + 18) + #define TRC_PV_HYPERCALL (TRC_PV + 1) #define TRC_PV_TRAP (TRC_PV + 3) @@ -111,6 +126,7 @@ #define TRC_SHADOW_RESYNC_ONLY (TRC_SHADOW + 15) /* trace events per subclass */ +#define TRC_HVM_NESTEDFLAG (0x400) #define TRC_HVM_VMENTRY (TRC_HVM_ENTRYEXIT + 0x01) #define TRC_HVM_VMEXIT (TRC_HVM_ENTRYEXIT + 0x02) #define TRC_HVM_VMEXIT64 (TRC_HVM_ENTRYEXIT + TRC_64_FLAG + 0x02) @@ -140,12 +156,38 @@ #define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14) #define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14) #define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15) -#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16) -#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17) +#define TRC_HVM_IOPORT_READ (TRC_HVM_HANDLER + 0x16) +#define TRC_HVM_IOMEM_READ (TRC_HVM_HANDLER + 0x17) #define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18) #define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19) #define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19) +#define TRC_HVM_RDTSC (TRC_HVM_HANDLER + 0x1a) +#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0x20) +#define TRC_HVM_NPF (TRC_HVM_HANDLER + 0x21) +#define TRC_HVM_REALMODE_EMULATE (TRC_HVM_HANDLER + 0x22) +#define TRC_HVM_TRAP (TRC_HVM_HANDLER + 0x23) +#define TRC_HVM_TRAP_DEBUG (TRC_HVM_HANDLER + 0x24) +#define TRC_HVM_VLAPIC (TRC_HVM_HANDLER + 0x25) +#define TRC_HVM_IOPORT_WRITE (TRC_HVM_HANDLER + 0x216) +#define TRC_HVM_IOMEM_WRITE (TRC_HVM_HANDLER + 0x217) + +/* trace events for per class */ +#define TRC_PM_FREQ_CHANGE (TRC_HW_PM + 0x01) +#define TRC_PM_IDLE_ENTRY (TRC_HW_PM + 0x02) +#define TRC_PM_IDLE_EXIT (TRC_HW_PM + 0x03) + +/* Trace events for IRQs */ +#define TRC_HW_IRQ_MOVE_CLEANUP_DELAY (TRC_HW_IRQ + 0x1) +#define TRC_HW_IRQ_MOVE_CLEANUP (TRC_HW_IRQ + 0x2) +#define TRC_HW_IRQ_BIND_VECTOR (TRC_HW_IRQ + 0x3) +#define TRC_HW_IRQ_CLEAR_VECTOR (TRC_HW_IRQ + 0x4) +#define TRC_HW_IRQ_MOVE_FINISH (TRC_HW_IRQ + 0x5) +#define TRC_HW_IRQ_ASSIGN_VECTOR (TRC_HW_IRQ + 0x6) +#define TRC_HW_IRQ_UNMAPPED_VECTOR (TRC_HW_IRQ + 0x7) +#define TRC_HW_IRQ_HANDLED (TRC_HW_IRQ + 0x8) + + /* This structure represents a single trace buffer record. */ struct t_rec { uint32_t event:28; @@ -180,6 +222,16 @@ /* Records follow immediately after the meta-data header. */ }; +/* Structure used to pass MFNs to the trace buffers back to trace consumers. + * Offset is an offset into the mapped structure where the mfn list will be held. + * MFNs will be at ((unsigned long *)(t_info))+(t_info->cpu_offset[cpu]). + */ +struct t_info { + uint16_t tbuf_size; /* Size in pages of each trace buffer */ + uint16_t mfn_offset[]; /* Offset within t_info structure of the page list per cpu */ + /* MFN lists immediately after the header */ +}; + #endif /* __XEN_PUBLIC_TRACE_H__ */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/vcpu.h#2 (text) ==== @@ -185,8 +185,7 @@ /* * Get the physical ID information for a pinned vcpu's underlying physical * processor. The physical ID informmation is architecture-specific. - * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and - * greater are reserved. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. * This command returns -EINVAL if it is not a valid operation for this VCPU. */ #define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ @@ -195,10 +194,36 @@ }; typedef struct vcpu_get_physid vcpu_get_physid_t; DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); -#define xen_vcpu_physid_to_x86_apicid(physid) \ - ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid))) -#define xen_vcpu_physid_to_x86_acpiid(physid) \ - ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32))) +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/version.h#2 (text) ==== @@ -78,6 +78,9 @@ /* arg == xen_domain_handle_t. */ #define XENVER_guest_handle 8 +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + #endif /* __XEN_PUBLIC_VERSION_H__ */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/xen-compat.h#2 (text) ==== @@ -27,7 +27,7 @@ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ #define __XEN_PUBLIC_XEN_COMPAT_H__ -#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209 +#define __XEN_LATEST_INTERFACE_VERSION__ 0x00040200 #if defined(__XEN__) || defined(__XEN_TOOLS__) /* Xen is built with matching headers and implements the latest interface. */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/xen.h#2 (text) ==== @@ -30,9 +30,11 @@ #include "xen-compat.h" #if defined(__i386__) || defined(__x86_64__) -#include "arch-x86/xen.h" +#include #elif defined(__ia64__) -#include "arch-ia64.h" +#include +#elif defined(__arm__) +#include #else #error "Unsupported architecture" #endif @@ -47,6 +49,7 @@ __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); DEFINE_XEN_GUEST_HANDLE(void); +DEFINE_XEN_GUEST_HANDLE(uint64_t); DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); #endif @@ -54,6 +57,10 @@ * HYPERCALLS */ +/* `incontents 100 hcalls List of hypercalls + * ` enum hypercall_num { // __HYPERVISOR_* => HYPERVISOR_*() + */ + #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 #define __HYPERVISOR_set_gdt 2 @@ -91,6 +98,8 @@ #define __HYPERVISOR_sysctl 35 #define __HYPERVISOR_domctl 36 #define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -102,6 +111,8 @@ #define __HYPERVISOR_arch_6 54 #define __HYPERVISOR_arch_7 55 +/* ` } */ + /* * HYPERCALL COMPATIBILITY. */ @@ -135,6 +146,7 @@ * The latter can be allocated only once per guest: they must initially be * allocated to VCPU0 but can subsequently be re-bound. */ +/* ` enum virq { */ #define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ #define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ #define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ @@ -143,6 +155,10 @@ #define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ #define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ #define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ +#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -153,26 +169,72 @@ #define VIRQ_ARCH_5 21 #define VIRQ_ARCH_6 22 #define VIRQ_ARCH_7 23 +/* ` } */ #define NR_VIRQS 24 /* - * MMU-UPDATE REQUESTS + * ` enum neg_errnoval + * ` HYPERVISOR_mmu_update(const struct mmu_update reqs[], + * ` unsigned count, unsigned *done_out, + * ` unsigned foreigndom) + * ` + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 * - * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. - * A foreigndom (FD) can be specified (or DOMID_SELF for none). - * Where the FD has some effect, it is described below. - * ptr[1:0] specifies the appropriate MMU_* command. - * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new - * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen's heap space. * ptr[:2] -- Machine address of the page-table entry to modify. * val -- Value to write. + * + * There also certain implicit requirements when using this hypercall. The + * pages that make up a pagetable must be mapped read-only in the guest. + * This prevents uncontrolled guest updates to the pagetable. Xen strictly + * enforces this, and will disallow any pagetable update which will end up + * mapping pagetable page RW, and will disallow using any writable page as a + * pagetable. In practice it means that when constructing a page table for a + * process, thread, etc, we MUST be very dilligient in following these rules: + * 1). Start with top-level page (PGD or in Xen language: L4). Fill out + * the entries. + * 2). Keep on going, filling out the upper (PUD or L3), and middle (PMD + * or L2). + * 3). Start filling out the PTE table (L1) with the PTE entries. Once + * done, make sure to set each of those entries to RO (so writeable bit + * is unset). Once that has been completed, set the PMD (L2) for this + * PTE table as RO. + * 4). When completed with all of the PMD (L2) entries, and all of them have + * been set to RO, make sure to set RO the PUD (L3). Do the same + * operation on PGD (L4) pagetable entries that have a PUD (L3) entry. + * 5). Now before you can use those pages (so setting the cr3), you MUST also + * pin them so that the hypervisor can verify the entries. This is done + * via the HYPERVISOR_mmuext_op(MMUEXT_PIN_L4_TABLE, guest physical frame + * number of the PGD (L4)). And this point the HYPERVISOR_mmuext_op( + * MMUEXT_NEW_BASEPTR, guest physical frame number of the PGD (L4)) can be + * issued. + * For 32-bit guests, the L4 is not used (as there is less pagetables), so + * instead use L3. + * At this point the pagetables can be modified using the MMU_NORMAL_PT_UPDATE + * hypercall. Also if so desired the OS can also try to write to the PTE + * and be trapped by the hypervisor (as the PTE entry is RO). + * + * To deallocate the pages, the operations are the reverse of the steps + * mentioned above. The argument is MMUEXT_UNPIN_TABLE for all levels and the + * pagetable MUST not be in use (meaning that the cr3 is not set to it). * * ptr[1:0] == MMU_MACHPHYS_UPDATE: * Updates an entry in the machine->pseudo-physical mapping table. @@ -183,6 +245,72 @@ * ptr[1:0] == MMU_PT_UPDATE_PRESERVE_AD: * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. + * + * @val is usually the machine frame number along with some attributes. + * The attributes by default follow the architecture defined bits. Meaning that + * if this is a X86_64 machine and four page table layout is used, the layout + * of val is: + * - 63 if set means No execute (NX) + * - 46-13 the machine frame number + * - 12 available for guest + * - 11 available for guest + * - 10 available for guest + * - 9 available for guest + * - 8 global + * - 7 PAT (PSE is disabled, must use hypercall to make 4MB or 2MB pages) + * - 6 dirty + * - 5 accessed + * - 4 page cached disabled + * - 3 page write through + * - 2 userspace accessible + * - 1 writeable + * - 0 present + * + * The one bits that does not fit with the default layout is the PAGE_PSE + * also called PAGE_PAT). The MMUEXT_[UN]MARK_SUPER arguments to the + * HYPERVISOR_mmuext_op serve as mechanism to set a pagetable to be 4MB + * (or 2MB) instead of using the PAGE_PSE bit. + * + * The reason that the PAGE_PSE (bit 7) is not being utilized is due to Xen + * using it as the Page Attribute Table (PAT) bit - for details on it please + * refer to Intel SDM 10.12. The PAT allows to set the caching attributes of + * pages instead of using MTRRs. + * + * The PAT MSR is as follow (it is a 64-bit value, each entry is 8 bits): + * PAT4 PAT0 + * +---+----+----+----+-----+----+----+ + * WC | WC | WB | UC | UC- | WC | WB | <= Linux + * +---+----+----+----+-----+----+----+ + * WC | WT | WB | UC | UC- | WT | WB | <= BIOS (default when machine boots) + * +---+----+----+----+-----+----+----+ + * WC | WP | WC | UC | UC- | WT | WB | <= Xen + * +---+----+----+----+-----+----+----+ + * + * The lookup of this index table translates to looking up + * Bit 7, Bit 4, and Bit 3 of val entry: + * + * PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + * + * If all bits are off, then we are using PAT0. If bit 3 turned on, + * then we are using PAT1, if bit 3 and bit 4, then PAT2.. + * + * As you can see, the Linux PAT1 translates to PAT4 under Xen. Which means + * that if a guest that follows Linux's PAT setup and would like to set Write + * Combined on pages it MUST use PAT4 entry. Meaning that Bit 7 (PAGE_PAT) is + * set. For example, under Linux it only uses PAT0, PAT1, and PAT2 for the + * caching as: + * + * WB = none (so PAT0) + * WC = PWT (bit 3 on) + * UC = PWT | PCD (bit 3 and 4 are on). + * + * To make it work with Xen, it needs to translate the WC bit as so: + * + * PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + * + * And to translate back it would: + * + * PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. */ #define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ #define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ @@ -227,10 +355,24 @@ * * cmd: MMUEXT_FLUSH_CACHE * No additional arguments. Writes back and flushes cache contents. + * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. * * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -247,12 +389,18 @@ #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 #ifndef __ASSEMBLY__ struct mmuext_op { unsigned int cmd; union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ xen_pfn_t mfn; /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ unsigned long linear_addr; @@ -262,10 +410,12 @@ unsigned int nr_ents; /* TLB_FLUSH_MULTI, INVLPG_MULTI */ #if __XEN_INTERFACE_VERSION__ >= 0x00030205 - XEN_GUEST_HANDLE(void) vcpumask; + XEN_GUEST_HANDLE(const_void) vcpumask; #else - void *vcpumask; + const void *vcpumask; #endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; } arg2; }; typedef struct mmuext_op mmuext_op_t; @@ -343,6 +493,16 @@ #define DOMID_XEN (0x7FF2U) /* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ @@ -442,7 +602,7 @@ * of this structure remaining constant. */ struct shared_info { - struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; /* * A domain can create "event channels" on which it can send and receive @@ -501,6 +661,7 @@ * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] * e. bootstrap page tables [pt_base, CR3 (x86)] * f. bootstrap stack [register ESP (x86)] @@ -539,9 +700,14 @@ unsigned long pt_base; /* VIRTUAL address of page directory. */ unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ }; typedef struct start_info start_info_t; @@ -554,12 +720,41 @@ /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ #define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; + typedef struct dom0_vga_console_info { uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ #define XEN_VGATYPE_TEXT_MODE_3 0x03 #define XEN_VGATYPE_VESA_LFB 0x23 +#define XEN_VGATYPE_EFI_LFB 0x70 union { struct { @@ -618,14 +813,23 @@ /* Default definitions for macros used by domctl/sysctl. */ #if defined(__XEN__) || defined(__XEN_TOOLS__) + #ifndef uint64_aligned_t #define uint64_aligned_t uint64_t #endif #ifndef XEN_GUEST_HANDLE_64 #define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) #endif + +#ifndef __ASSEMBLY__ +struct xenctl_cpumap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_cpus; +}; #endif +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_XEN_H__ */ /* ==== //depot/SpectraBSD/stable/9/sys/xen/interface/xenoprof.h#2 (text) ==== @@ -50,7 +50,11 @@ #define XENOPROF_shutdown 13 #define XENOPROF_get_buffer 14 #define XENOPROF_set_backtrace 15 -#define XENOPROF_last_op 15 + +/* AMD IBS support */ +#define XENOPROF_get_ibs_caps 16 +#define XENOPROF_ibs_counter 17 +#define XENOPROF_last_op 17 #define MAX_OPROF_EVENTS 32 #define MAX_OPROF_DOMAINS 25 @@ -64,7 +68,7 @@ }; /* PC value that indicates a special code */ -#define XENOPROF_ESCAPE_CODE ~0UL +#define XENOPROF_ESCAPE_CODE (~0ULL) /* Transient events for the xenoprof->oprofile cpu buf */ #define XENOPROF_TRACE_BEGIN 1 @@ -124,6 +128,16 @@ } xenoprof_passive_t; DEFINE_XEN_GUEST_HANDLE(xenoprof_passive_t); +struct xenoprof_ibs_counter { + uint64_t op_enabled; + uint64_t fetch_enabled; + uint64_t max_cnt_fetch; + uint64_t max_cnt_op; + uint64_t rand_en; + uint64_t dispatched_ops; +}; +typedef struct xenoprof_ibs_counter xenoprof_ibs_counter_t; +DEFINE_XEN_GUEST_HANDLE(xenoprof_ibs_counter_t); #endif /* __XEN_PUBLIC_XENOPROF_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/xenstore/xenstore.c#3 (text) ==== @@ -58,6 +58,7 @@ #include #include +#include #include #include Change 537182 by justing@justing-ns1 on 2012/04/19 08:26:58 Re-structure Xen HVM support so that: o Xen is detected and hypercalls can be performed very early in system startup. o Xen interrupt services are implemented using FreeBSD's native interrupt delivery infrastructure. o the Xen interrupt service implementation can eventually be shared between PV and HVM guests. o Xen interrupt handlers can optionally use a filter handler in order to avoid the overhead of dispatch to an interrupt thread. o interrupt load can be distributed among all available CPUs. o the overhead of accessing the emulated local and I/O apics is removed for event channel port events. o a similar optimization can eventually, and fairly easily, be used to optimize MSI and IPI event delivery. sys/x86/x86/local_apic.c: sys/amd64/include/apicvar.h: sys/i386/include/apicvar.h: sys/amd64/amd64/apic_vector.S: sys/i386/i386/apic_vector.s: sys/amd64/amd64/machdep.c: sys/i386/i386/machdep.c: sys/amd64/include/segments.h: sys/i386/include/segments.h: Reserve IDT vector 0x93 for the Xen event channel upcall interrupt handler. On Hypervisors that support the direct vector callback feature, we can request that this vector be called directly by an injected HVM interrupt event, instead of a simulated PCI interrupt on the Xen platform PCI device. This avoids all of the overhead of dealing with the emulated I/O APIC and local APIC. It also means that the Hypervisor can inject these events on any CPU, allowing upcalls for different ports to be handled in parallel. sys/amd64/include/segments.h: sys/i386/include/segments.h: Preemptively move IDT_DTRACE_RET from 0x20 to 0x92. This change has already occurred in the upstream FreeBSD/stable/9 and will ease future merges of this file. sys/amd64/amd64/intr_machdep.c: Fix interrupt load balancing regression, introduced in FreeBSD's SVN revision 222813, that left all un-pinned interrupts assigned to CPU 0. In intr_shuffle_irqs(), remove CPU_SETOF() call that initialized the "intr_cpus" cpuset to only contain CPU0. This initialization is too late and nullifies the results of calls the intr_add_cpu() that occur much earlier in the boot process. Since "intr_cpus" is statically initialized to the empty set, and all processors, including the BSP, already add themselves to "intr_cpus" no special initialization for the BSP is necessary. sys/amd64/include/intr_machdep.h: sys/i386/include/intr_machdep.h: Increase the FreeBSD IRQ vector table to include space for event channel interrupt sources. sys/amd64/include/pcpu.h: sys/i386/include/pcpu.h: Remove Xen HVM per-cpu variable data. These fields are now allocated via the dynamic per-cpu scheme. See xen_intr.c for details. sys/amd64/amd64/machdep.c: sys/dev/xen/xenpci/xenpcivar.h: sys/dev/xen/xenpci/xenpci.c: sys/x86/xen/hvm.c: sys/sys/kernel.h: Refactor magic ioport, Hypercall table and Hypervisor shared information page setup, and move it to a dedicated HVM support module. HVM mode initialization is now triggered during the SI_SUB_HYPERVISOR phase of system startup. This currently occurs just after the kernel VM is fully setup which is just enough infrastructure to allow the hypercall table and shared info page to be properly mapped. sys/xen/hvm.h: sys/x86/xen/hvm.c: Add definitions and a method for configuring Hypervisor event delievery via a direct vector callback. sys/amd64/include/xen/xen-os.h: sys/x86/xen/hvm.c: Add a new method, xen_os_capabilities(), which allows different modules of FreeBSD Xen support to cleanly querry whether optional feature setup has been successful. The only currently supported capability is XOC_PCPU_INTR_BINDING, which indicates that Xen interrupts can be bound to specific CPUs. sys/conf/files: sys/conf/files.amd64: sys/conf/files.i386: Adjust kernel build to reflect the refactoring of early Xen startup code and Xen interrupt services. sys/dev/xen/blkback/blkback.c: sys/dev/xen/blkfront/blkfront.c: sys/dev/xen/blkfront/block.h: sys/dev/xen/control/control.c: sys/dev/xen/evtchn/evtchn_dev.c: sys/dev/xen/netback/netback.c: sys/dev/xen/netfront/netfront.c: sys/xen/xenstore/xenstore.c: sys/xen/evtchn/evtchn_dev.c: Adjust drivers to use new xen_intr_*() API. sys/dev/xen/blkback/blkback.c: Since blkback defers all event handling to a taskqueue, convert this task queue to a "fast" taskqueue, and schedule it via an interrupt filter. This avoids an unnecessary ithread context switch. sys/xen/xenstore/xenstore.c: The xenstore driver is MPSAFE. Indicate as much when registering its interrupt handler. sys/xen/xenbus/xenbus.c: sys/xen/xenbus/xenbusvar.h: Remove unused event channel APIs. sys/xen/evtchn.h: Remove all kernel Xen interrupt service API definitions from this file. It is now only used for structure and ioctl definitions related to the event channel userland device driver. Update the definitions in this file to match those from NetBSD. Implementing this interface will be necessary for Dom0 support. sys/xen/evtchn/evtchnvar.h: Add a header file for implemenation internal APIs related to managing event channels event delivery. This is used to allow, for example, the event channel userland device driver to access low-level routines that typical kernel consumers of event channel services should never access. sys/xen/interface/event_channel.h: sys/xen/xen_intr.h: Standardize on the evtchn_port_t type for referring to an event channel port id. In order to prevent low-level event channel APIs from leaking to kernel consumers who should not have access to this data, the type is defined twice: Once in the Xen provided event_channel.h, and again in xen/xen_intr.h. The double declaration is protected by __XEN_EVTCHN_PORT_DEFINED__ to ensure it is never declared twice within a given compilation unit. sys/xen/xen_intr.h: sys/x86/xen/xen_intr.c: New implementation of HVM Xen interrupt services. This is similar in many respects to the i386 PV implementation with the exception that events for bound to event channel ports (i.e. not IPI, virtual IRQ, or physical IRQ) are further optimized to avoid mask/unmask operations that aren't necessary for these edge triggered events. Stubs exist for supporting physical IRQ binding, but will need additional work before this implementation can be fully shared between PV and HVM. Affected files ... ... //depot/SpectraBSD/stable/9/sys/amd64/amd64/apic_vector.S#3 edit ... //depot/SpectraBSD/stable/9/sys/amd64/amd64/intr_machdep.c#2 edit ... //depot/SpectraBSD/stable/9/sys/amd64/amd64/machdep.c#3 edit ... //depot/SpectraBSD/stable/9/sys/amd64/include/apicvar.h#2 edit ... //depot/SpectraBSD/stable/9/sys/amd64/include/intr_machdep.h#2 edit ... //depot/SpectraBSD/stable/9/sys/amd64/include/pcpu.h#3 edit ... //depot/SpectraBSD/stable/9/sys/amd64/include/segments.h#2 edit ... //depot/SpectraBSD/stable/9/sys/amd64/include/xen/xen-os.h#3 edit ... //depot/SpectraBSD/stable/9/sys/conf/files#4 edit ... //depot/SpectraBSD/stable/9/sys/conf/files.amd64#3 edit ... //depot/SpectraBSD/stable/9/sys/conf/files.i386#3 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/blkback/blkback.c#8 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/blkfront/blkfront.c#5 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/blkfront/block.h#4 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/control/control.c#2 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/evtchn/evtchn_dev.c#2 delete ... //depot/SpectraBSD/stable/9/sys/dev/xen/netback/netback.c#4 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/netfront/netfront.c#4 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/evtchn.c#4 move/delete ... //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/xenpci.c#2 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/xenpcivar.h#2 edit ... //depot/SpectraBSD/stable/9/sys/i386/i386/apic_vector.s#2 edit ... //depot/SpectraBSD/stable/9/sys/i386/i386/machdep.c#3 edit ... //depot/SpectraBSD/stable/9/sys/i386/include/apicvar.h#2 edit ... //depot/SpectraBSD/stable/9/sys/i386/include/intr_machdep.h#2 edit ... //depot/SpectraBSD/stable/9/sys/i386/include/pcpu.h#2 edit ... //depot/SpectraBSD/stable/9/sys/i386/include/segments.h#2 edit ... //depot/SpectraBSD/stable/9/sys/sys/kernel.h#2 edit ... //depot/SpectraBSD/stable/9/sys/x86/x86/local_apic.c#2 edit ... //depot/SpectraBSD/stable/9/sys/x86/xen/hvm.c#1 add ... //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#1 move/add ... //depot/SpectraBSD/stable/9/sys/xen/evtchn.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/evtchn/evtchn_dev.c#3 edit ... //depot/SpectraBSD/stable/9/sys/xen/evtchn/evtchnvar.h#1 add ... //depot/SpectraBSD/stable/9/sys/xen/hvm.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/interface/event_channel.h#3 edit ... //depot/SpectraBSD/stable/9/sys/xen/xen_intr.h#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/xenbus/xenbus.c#2 edit ... //depot/SpectraBSD/stable/9/sys/xen/xenbus/xenbusvar.h#4 edit ... //depot/SpectraBSD/stable/9/sys/xen/xenstore/xenstore.c#4 edit Moved files ... ... //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#1 moved from //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/evtchn.c#3 Differences ... ==== //depot/SpectraBSD/stable/9/sys/amd64/amd64/apic_vector.S#3 (text) ==== @@ -128,6 +128,22 @@ MEXITCOUNT jmp doreti +#ifdef XENHVM +/* + * Xen event channel upcall interrupt handler. + * Only used when the hypervisor supports direct vector callbacks. + */ + .text + SUPERALIGN_TEXT +IDTVEC(xen_intr_upcall) + PUSH_FRAME + FAKE_MCOUNT(TF_RIP(%rsp)) + movq %rsp, %rdi + call xen_intr_handle_upcall + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. ==== //depot/SpectraBSD/stable/9/sys/amd64/amd64/intr_machdep.c#2 (text) ==== @@ -514,9 +514,6 @@ struct intsrc *isrc; int i; - /* The BSP is always a valid target. */ - CPU_SETOF(0, &intr_cpus); - /* Don't bother on UP. */ if (mp_ncpus == 1) return; ==== //depot/SpectraBSD/stable/9/sys/amd64/amd64/machdep.c#3 (text) ==== @@ -1166,6 +1166,9 @@ #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif +#ifdef XENHVM + IDTVEC(xen_intr_upcall), +#endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32); #ifdef DDB @@ -1719,6 +1722,9 @@ #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif +#ifdef XENHVM + setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; @@ -1842,14 +1848,6 @@ if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); -#ifdef XENHVM - if (inw(0x10) == 0x49d2) { - if (bootverbose) - printf("Xen detected: disabling emulated block and network devices\n"); - outw(0x10, 3); - } -#endif - cpu_probe_amdc1e(); /* Location of kernel stack for locore */ ==== //depot/SpectraBSD/stable/9/sys/amd64/include/apicvar.h#2 (text) ==== @@ -227,6 +227,7 @@ enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(int boot); +void xen_intr_handle_upcall(struct trapframe *frame); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ ==== //depot/SpectraBSD/stable/9/sys/amd64/include/intr_machdep.h#2 (text) ==== @@ -44,12 +44,24 @@ * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 4863 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. ==== //depot/SpectraBSD/stable/9/sys/amd64/include/pcpu.h#3 (text) ==== @@ -42,15 +42,6 @@ #endif #endif -#ifdef XENHVM -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i -#else -#define PCPU_XEN_FIELDS -#endif - /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. @@ -76,8 +67,7 @@ struct system_segment_descriptor *pc_ldt; \ /* Pointer to the CPU TSS descriptor */ \ struct system_segment_descriptor *pc_tss; \ - u_int pc_cmci_mask /* MCx banks for CMCI */ \ - PCPU_XEN_FIELDS + u_int pc_cmci_mask /* MCx banks for CMCI */ #ifdef _KERNEL ==== //depot/SpectraBSD/stable/9/sys/amd64/include/segments.h#2 (text) ==== @@ -214,7 +214,8 @@ #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ -#define IDT_DTRACE_RET 0x20 /* DTrace pid provider Interrupt Vector */ +#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) ==== //depot/SpectraBSD/stable/9/sys/amd64/include/xen/xen-os.h#3 (text) ==== @@ -31,6 +31,20 @@ extern shared_info_t *HYPERVISOR_shared_info; +typedef enum { + XOC_NONE = 0x0, + + /** Xen interrupt delivery can be bound to a specific CPU. */ + XOC_PCPU_INTR_BINDING = 0x1 +} xen_os_capability_t; + +/** + * Query the capabilites of FreeBSD's Xen Support. + * + * \return A bitmap of supported capabilities. + */ +xen_os_capability_t xen_os_capabilities(void); + /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ static inline void rep_nop(void) { ==== //depot/SpectraBSD/stable/9/sys/conf/files#4 (text) ==== @@ -3542,7 +3542,6 @@ xen/gnttab.c optional xen | xenhvm xen/features.c optional xen | xenhvm -xen/evtchn/evtchn.c optional xen xen/evtchn/evtchn_dev.c optional xen | xenhvm xen/xenbus/xenbus_if.m optional xen | xenhvm xen/xenbus/xenbus.c optional xen | xenhvm @@ -3561,4 +3560,3 @@ dev/xen/netback/netback.c optional xen | xenhvm dev/xen/netfront/netfront.c optional xen | xenhvm dev/xen/xenpci/xenpci.c optional xenpci -dev/xen/xenpci/evtchn.c optional xenpci ==== //depot/SpectraBSD/stable/9/sys/conf/files.amd64#3 (text) ==== @@ -482,3 +482,5 @@ x86/x86/msi.c optional pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm ==== //depot/SpectraBSD/stable/9/sys/conf/files.i386#3 (text) ==== @@ -534,3 +534,5 @@ x86/x86/msi.c optional apic pci x86/x86/nexus.c standard x86/x86/tsc.c standard +x86/xen/hvm.c optional xenhvm +x86/xen/xen_intr.c optional xen | xenhvm ==== //depot/SpectraBSD/stable/9/sys/dev/xen/blkback/blkback.c#8 (text) ==== @@ -77,7 +77,6 @@ #include #include -#include #include #include @@ -683,7 +682,7 @@ blkif_back_rings_t rings; /** IRQ mapping for the communication ring event channel. */ - int irq; + xen_intr_handle_t xen_intr_handle; /** * \brief Backend access mode flags (e.g. write, or read-only). @@ -1437,7 +1436,7 @@ mtx_unlock(&xbb->lock); if (notify) - notify_remote_via_irq(xbb->irq); + xen_intr_signal(xbb->xen_intr_handle); } /** @@ -2056,14 +2055,16 @@ * \param arg Callback argument registerd during event channel * binding - the xbb_softc for this instance. */ -static void -xbb_intr(void *arg) +static int +xbb_filter(void *arg) { struct xbb_softc *xbb; - /* Defer to kernel thread. */ + /* Defer to taskqueue thread. */ xbb = (struct xbb_softc *)arg; taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task); + + return (FILTER_HANDLED); } SDT_PROVIDER_DEFINE(xbb); @@ -2855,10 +2856,7 @@ if ((xbb->flags & XBBF_RING_CONNECTED) == 0) return (0); - if (xbb->irq != 0) { - unbind_from_irqhandler(xbb->irq); - xbb->irq = 0; - } + xen_intr_unbind(&xbb->xen_intr_handle); mtx_unlock(&xbb->lock); taskqueue_drain(xbb->io_taskqueue, &xbb->io_task); @@ -3010,13 +3008,14 @@ xbb->flags |= XBBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xbb->otherend_id, - xbb->ring_config.evtchn, - device_get_nameunit(xbb->dev), - xbb_intr, /*arg*/xbb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xbb->irq); + error = xen_intr_bind_remote_port(xbb->dev, + xbb->otherend_id, + xbb->ring_config.evtchn, + xbb_filter, + /*ithread_handler*/NULL, + /*arg*/xbb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xbb->xen_intr_handle); if (error) { (void)xbb_disconnect(xbb); xenbus_dev_fatal(xbb->dev, error, "binding event channel"); @@ -3845,9 +3844,10 @@ * Create a taskqueue for doing work that must occur from a * thread context. */ - xbb->io_taskqueue = taskqueue_create(device_get_nameunit(dev), M_NOWAIT, - taskqueue_thread_enqueue, - /*context*/&xbb->io_taskqueue); + xbb->io_taskqueue = taskqueue_create_fast(device_get_nameunit(dev), + M_NOWAIT, + taskqueue_thread_enqueue, + /*contxt*/&xbb->io_taskqueue); if (xbb->io_taskqueue == NULL) { xbb_attach_failed(xbb, error, "Unable to create taskqueue"); return (ENOMEM); ==== //depot/SpectraBSD/stable/9/sys/dev/xen/blkfront/blkfront.c#5 (text) ==== @@ -57,7 +57,6 @@ #include #include -#include #include #include #include @@ -743,7 +742,7 @@ } error = xs_printf(XST_NIL, node_path, "event-channel", - "%u", irq_to_evtchn_port(sc->irq)); + "%u", xen_intr_port(sc->xen_intr_handle)); if (error) { xenbus_dev_fatal(sc->xb_dev, error, "writing %s/event-channel", @@ -819,13 +818,13 @@ } } - error = bind_listening_port_to_irqhandler( - xenbus_get_otherend_id(sc->xb_dev), - "xbd", (driver_intr_t *)blkif_int, sc, - INTR_TYPE_BIO | INTR_MPSAFE, &sc->irq); + error = xen_intr_alloc_and_bind_local_port( + sc->xb_dev, xenbus_get_otherend_id(sc->xb_dev), + /*filter*/NULL, blkif_int, /*arg*/sc, + INTR_TYPE_BIO | INTR_MPSAFE, &sc->xen_intr_handle); if (error) { xenbus_dev_fatal(sc->xb_dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); return (error); } @@ -971,7 +970,7 @@ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->ring, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); } static void @@ -1384,10 +1383,7 @@ xb_initq_complete(sc); } - if (sc->irq) { - unbind_from_irqhandler(sc->irq); - sc->irq = 0; - } + xen_intr_unbind(sc->xen_intr_handle); } static int ==== //depot/SpectraBSD/stable/9/sys/dev/xen/blkfront/block.h#4 (text) ==== @@ -172,7 +172,7 @@ uint32_t max_request_size; grant_ref_t ring_ref[XBF_MAX_RING_PAGES]; blkif_front_ring_t ring; - unsigned int irq; + xen_intr_handle_t xen_intr_handle; struct gnttab_free_callback callback; TAILQ_HEAD(,xb_command) cm_free; TAILQ_HEAD(,xb_command) cm_ready; ==== //depot/SpectraBSD/stable/9/sys/dev/xen/control/control.c#2 (text) ==== @@ -129,6 +129,7 @@ #include #include +#include #include #include @@ -244,6 +245,7 @@ xencons_suspend(); gnttab_suspend(); + intr_suspend(); max_pfn = HYPERVISOR_shared_info->arch.max_pfn; @@ -284,7 +286,7 @@ HYPERVISOR_shared_info->arch.max_pfn = max_pfn; gnttab_resume(); - irq_resume(); + intr_resume(); local_irq_enable(); xencons_resume(); @@ -354,13 +356,11 @@ * Prevent any races with evtchn_interrupt() handler. */ disable_intr(); - irq_suspend(); + intr_suspend(); suspend_cancelled = HYPERVISOR_suspend(0); - if (suspend_cancelled) - irq_resume(); - else - xenpci_resume(); + + intr_resume(); /* * Re-enable interrupts and put the scheduler back to normal. ==== //depot/SpectraBSD/stable/9/sys/dev/xen/netback/netback.c#4 (text) ==== @@ -80,7 +80,7 @@ #include #include -#include +#include #include #include #include @@ -431,8 +431,8 @@ /** Xen device handle.*/ long handle; - /** IRQ mapping for the communication ring event channel. */ - int irq; + /** Handle to the communication ring event channel. */ + xen_intr_handle_t xen_intr_handle; /** * \brief Cached value of the front-end's domain id. @@ -645,10 +645,7 @@ int error; int i; - if (xnb->irq != 0) { - unbind_from_irqhandler(xnb->irq); - xnb->irq = 0; - } + xen_intr_unbind(xnb->xen_intr_handle); /* * We may still have another thread currently processing requests. We @@ -771,13 +768,13 @@ xnb->flags |= XNBF_RING_CONNECTED; - error = - bind_interdomain_evtchn_to_irqhandler(xnb->otherend_id, - xnb->evtchn, - device_get_nameunit(xnb->dev), - xnb_intr, /*arg*/xnb, - INTR_TYPE_BIO | INTR_MPSAFE, - &xnb->irq); + error = xen_intr_bind_remote_port(xnb->dev, + xnb->otherend_id, + xnb->evtchn, + /*filter*/NULL, + xnb_intr, /*arg*/xnb, + INTR_TYPE_BIO | INTR_MPSAFE, + &xnb->xen_intr_handle); if (error != 0) { (void)xnb_disconnect(xnb); xenbus_dev_fatal(xnb->dev, error, "binding event channel"); @@ -1446,7 +1443,7 @@ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(txb, notify); if (notify != 0) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); txb->sring->req_event = txb->req_cons + 1; xen_mb(); @@ -2359,7 +2356,7 @@ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(rxb, notify); if ((notify != 0) || (out_of_space != 0)) - notify_remote_via_irq(xnb->irq); + xen_intr_signal(xnb->xen_intr_handle); rxb->sring->req_event = req_prod_local + 1; xen_mb(); } while (rxb->sring->req_prod != req_prod_local) ; ==== //depot/SpectraBSD/stable/9/sys/dev/xen/netfront/netfront.c#4 (text) ==== @@ -82,7 +82,6 @@ #include #include #include -#include #include #include #include @@ -260,8 +259,7 @@ struct mtx rx_lock; struct mtx sc_lock; - u_int handle; - u_int irq; + xen_intr_handle_t xen_intr_handle; u_int copying_receiver; u_int carrier; u_int maxfrags; @@ -557,7 +555,8 @@ goto abort_transaction; } err = xs_printf(xst, node, - "event-channel", "%u", irq_to_evtchn_port(info->irq)); + "event-channel", "%u", + xen_intr_port(info->xen_intr_handle)); if (err) { message = "writing event-channel"; goto abort_transaction; @@ -620,7 +619,6 @@ info->rx_ring_ref = GRANT_REF_INVALID; info->rx.sring = NULL; info->tx.sring = NULL; - info->irq = 0; txs = (netif_tx_sring_t *)malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); if (!txs) { @@ -647,12 +645,13 @@ if (error) goto fail; - error = bind_listening_port_to_irqhandler(xenbus_get_otherend_id(dev), - "xn", xn_intr, info, INTR_TYPE_NET | INTR_MPSAFE, &info->irq); + error = xen_intr_alloc_and_bind_local_port(dev, + xenbus_get_otherend_id(dev), /*filter*/NULL, xn_intr, info, + INTR_TYPE_NET | INTR_MPSAFE, &info->xen_intr_handle); if (error) { xenbus_dev_fatal(dev, error, - "bind_evtchn_to_irqhandler failed"); + "xen_intr_alloc_and_bind_local_port failed"); goto fail; } @@ -977,7 +976,7 @@ push: RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->rx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); } static void @@ -1700,7 +1699,7 @@ RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->tx, notify); if (notify) - notify_remote_via_irq(sc->irq); + xen_intr_signal(sc->xen_intr_handle); if (RING_FULL(&sc->tx)) { sc->tx_full = 1; @@ -1987,7 +1986,7 @@ * packets. */ netfront_carrier_on(np); - notify_remote_via_irq(np->irq); + xen_intr_signal(np->xen_intr_handle); XN_TX_LOCK(np); xn_txeof(np); XN_TX_UNLOCK(np); @@ -2076,8 +2075,9 @@ return (err); } -/** Create a network device. - * @param handle device handle +/** + * Create a network device. + * @param dev Newbus device representing this virtual NIC. */ int create_netdev(device_t dev) @@ -2216,10 +2216,7 @@ free_ring(&info->tx_ring_ref, &info->tx.sring); free_ring(&info->rx_ring_ref, &info->rx.sring); - if (info->irq) - unbind_from_irqhandler(info->irq); - - info->irq = 0; + xen_intr_unbind(&info->xen_intr_handle); } static void ==== //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/xenpci.c#2 (text) ==== @@ -32,9 +32,6 @@ #include #include #include -#include -#include -#include #include #include @@ -42,30 +39,18 @@ #include #include + #include #include -#include -#include -#include -#include +#include #include #include -#include -#include -#include -#include +#include -#include +extern void xen_intr_handle_upcall(struct trapframe *trap_frame); -/* - * These variables are used by the rest of the kernel to access the - * hypervisor. - */ -char *hypercall_stubs; -shared_info_t *HYPERVISOR_shared_info; -static vm_paddr_t shared_info_pa; static device_t nexus; /* @@ -73,103 +58,28 @@ */ static devclass_t xenpci_devclass; -/* - * Return the CPUID base address for Xen functions. - */ -static uint32_t -xenpci_cpuid_base(void) +static int +xenpci_intr_filter(void *trap_frame) { - uint32_t base, regs[4]; - - for (base = 0x40000000; base < 0x40010000; base += 0x100) { - do_cpuid(base, regs); - if (!memcmp("XenVMMXenVMM", ®s[1], 12) - && (regs[0] - base) >= 2) - return (base); - } - return (0); + xen_intr_handle_upcall(trap_frame); + return (FILTER_HANDLED); } -/* - * Allocate and fill in the hypcall page. - */ static int -xenpci_init_hypercall_stubs(device_t dev, struct xenpci_softc * scp) +xenpci_irq_init(device_t device, struct xenpci_softc *scp) { - uint32_t base, regs[4]; - int i; + int error; - base = xenpci_cpuid_base(); - if (!base) { - device_printf(dev, "Xen platform device but not Xen VMM\n"); - return (EINVAL); - } - - if (bootverbose) { - do_cpuid(base + 1, regs); - device_printf(dev, "Xen version %d.%d.\n", - regs[0] >> 16, regs[0] & 0xffff); - } - - /* - * Find the hypercall pages. - */ - do_cpuid(base + 2, regs); - - hypercall_stubs = malloc(regs[0] * PAGE_SIZE, M_TEMP, M_WAITOK); - - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } - - return (0); + error = BUS_SETUP_INTR(device_get_parent(device), device, + scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, + xenpci_intr_filter, NULL, /*trap_frame*/NULL, + &scp->intr_cookie); + if (error != 0) + xen_hvm_set_callback(device); + return (error); } /* - * After a resume, re-initialise the hypercall page. - */ -static void -xenpci_resume_hypercall_stubs(device_t dev, struct xenpci_softc * scp) -{ - uint32_t base, regs[4]; - int i; - - base = xenpci_cpuid_base(); - - do_cpuid(base + 2, regs); - for (i = 0; i < regs[0]; i++) { - wrmsr(regs[1], vtophys(hypercall_stubs + i * PAGE_SIZE) + i); - } -} - -/* - * Tell the hypervisor how to contact us for event channel callbacks. - */ -static void -xenpci_set_callback(device_t dev) -{ - int irq; - uint64_t callback; - struct xen_hvm_param xhp; - - irq = pci_get_irq(dev); - if (irq < 16) { - callback = irq; - } else { - callback = (pci_get_intpin(dev) - 1) & 3; - callback |= pci_get_slot(dev) << 11; - callback |= 1ull << 56; - } - - xhp.domid = DOMID_SELF; - xhp.index = HVM_PARAM_CALLBACK_IRQ; - xhp.value = callback; - if (HYPERVISOR_hvm_op(HVMOP_set_param, &xhp)) - panic("Can't set evtchn callback"); -} - - -/* * Deallocate anything allocated by xenpci_allocate_resources. */ static int @@ -293,35 +203,6 @@ } /* - * Called very early in the resume sequence - reinitialise the various - * bits of Xen machinery including the hypercall page and the shared - * info page. - */ -void -xenpci_resume() -{ - device_t dev = devclass_get_device(xenpci_devclass, 0); - struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - - xenpci_resume_hypercall_stubs(dev, scp); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - pmap_kenter((vm_offset_t) HYPERVISOR_shared_info, shared_info_pa); - - xenpci_set_callback(dev); - - gnttab_resume(); - irq_resume(); -} - -/* * Probe - just check device ID. */ static int @@ -341,11 +222,9 @@ static int xenpci_attach(device_t dev) { - int error; struct xenpci_softc *scp = device_get_softc(dev); - struct xen_add_to_physmap xatp; - vm_offset_t shared_va; devclass_t dc; + int error; /* * Find and record nexus0. Since we are not really on the @@ -365,33 +244,10 @@ goto errexit; } - error = xenpci_init_hypercall_stubs(dev, scp); - if (error) { - device_printf(dev, "xenpci_init_hypercall_stubs failed(%d).\n", - error); - goto errexit; - } - - setup_xen_features(); - - xenpci_alloc_space_int(scp, PAGE_SIZE, &shared_info_pa); - - xatp.domid = DOMID_SELF; - xatp.idx = 0; - xatp.space = XENMAPSPACE_shared_info; - xatp.gpfn = shared_info_pa >> PAGE_SHIFT; - if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) - panic("HYPERVISOR_memory_op failed"); - - shared_va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); - pmap_kenter(shared_va, shared_info_pa); - HYPERVISOR_shared_info = (void *) shared_va; - /* * Hook the irq up to evtchn */ xenpci_irq_init(dev, scp); - xenpci_set_callback(dev); return (bus_generic_attach(dev)); @@ -431,13 +287,42 @@ return (xenpci_deallocate_resources(dev)); } +static int +xenpci_suspend(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + if (scp->intr_cookie != NULL) { + if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq, + scp->intr_cookie) != 0) + printf("intr teardown failed.. continuing\n"); + scp->intr_cookie = NULL; + } + + return (bus_generic_suspend(dev)); +} + +static int +xenpci_resume(device_t dev) +{ + struct xenpci_softc *scp = device_get_softc(dev); + device_t parent = device_get_parent(dev); + + BUS_SETUP_INTR(parent, dev, scp->res_irq, + INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL, + /*trap_frame*/NULL, &scp->intr_cookie); + xen_hvm_set_callback(dev); + return (bus_generic_resume(dev)); +} + static device_method_t xenpci_methods[] = { /* Device interface */ DEVMETHOD(device_probe, xenpci_probe), DEVMETHOD(device_attach, xenpci_attach), DEVMETHOD(device_detach, xenpci_detach), - DEVMETHOD(device_suspend, bus_generic_suspend), - DEVMETHOD(device_resume, bus_generic_resume), + DEVMETHOD(device_suspend, xenpci_suspend), + DEVMETHOD(device_resume, xenpci_resume), /* Bus interface */ DEVMETHOD(bus_add_child, bus_generic_add_child), ==== //depot/SpectraBSD/stable/9/sys/dev/xen/xenpci/xenpcivar.h#2 (text) ==== @@ -38,7 +38,4 @@ vm_paddr_t phys_next; /* next page from mem range */ }; -extern int xenpci_irq_init(device_t device, struct xenpci_softc *scp); extern int xenpci_alloc_space(size_t sz, vm_paddr_t *pa); -extern void xenpci_resume(void); -extern void xen_suspend(void); ==== //depot/SpectraBSD/stable/9/sys/i386/i386/apic_vector.s#2 (text) ==== @@ -138,6 +138,25 @@ MEXITCOUNT jmp doreti +#ifdef XENHVM +/* + * Xen event channel upcall interrupt handler. + * Only used when the hypervisor supports direct vector callbacks. + */ + .text + SUPERALIGN_TEXT +IDTVEC(xen_intr_upcall) + PUSH_FRAME + SET_KERNEL_SREGS + cld + FAKE_MCOUNT(TF_EIP(%esp)) + pushl %esp + call xen_intr_handle_upcall + add $4, %esp + MEXITCOUNT + jmp doreti +#endif + #ifdef SMP /* * Global address space TLB shootdown. ==== //depot/SpectraBSD/stable/9/sys/i386/i386/machdep.c#3 (text) ==== @@ -1926,6 +1926,9 @@ #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif +#ifdef XENHVM + IDTVEC(xen_intr_upcall), +#endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB @@ -2900,6 +2903,10 @@ setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif +#ifdef XENHVM + setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386TGT, SEL_UPL, + GSEL(GCODE_SEL, SEL_KPL)); +#endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; ==== //depot/SpectraBSD/stable/9/sys/i386/include/apicvar.h#2 (text) ==== @@ -216,6 +216,7 @@ u_int apic_idt_to_irq(u_int apic_id, u_int vector); void apic_register_enumerator(struct apic_enumerator *enumerator); u_int apic_cpuid(u_int apic_id); +void evtchn_handle_upcall(struct trapframe *trap_frame); void *ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase); int ioapic_disable_pin(void *cookie, u_int pin); int ioapic_get_vector(void *cookie, u_int pin); ==== //depot/SpectraBSD/stable/9/sys/i386/include/intr_machdep.h#2 (text) ==== @@ -44,12 +44,24 @@ * allocate IDT vectors. * * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid - * confusion since 255 is used in PCI to indicate an invalid IRQ. + * IRQ values from 256 to 767 are used by MSI. When running under the Xen + * Hypervisor, IRQ values from 768 to 1791 are available for binding to + * event channel events. We leave 255 unused to avoid confusion since 255 is + * used in PCI to indicate an invalid IRQ. */ #define NUM_MSI_INTS 512 #define FIRST_MSI_INT 256 -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) +#ifdef XENHVM +#include +#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS +#define FIRST_EVTCHN_INT \ + (FIRST_MSI_INT + NUM_MSI_INTS) +#define LAST_EVTCHN_INT \ + (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) +#else +#define NUM_EVTCHN_INTS 0 +#endif +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) /* * Default base address for MSI messages on x86 platforms. ==== //depot/SpectraBSD/stable/9/sys/i386/include/pcpu.h#2 (text) ==== @@ -76,14 +76,7 @@ int pc_virq_to_irq[NR_VIRQS]; \ int pc_ipi_to_irq[NR_IPIS] -#elif defined(XENHVM) - -#define PCPU_XEN_FIELDS \ - ; \ - unsigned int pc_last_processed_l1i; \ - unsigned int pc_last_processed_l2i - -#else /* !XEN && !XENHVM */ +#else /* !XEN */ #define PCPU_XEN_FIELDS ==== //depot/SpectraBSD/stable/9/sys/i386/include/segments.h#2 (text) ==== @@ -207,7 +207,8 @@ #define IDT_XF 19 /* #XF: SIMD Floating-Point Exception */ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ -#define IDT_DTRACE_RET 0x20 /* DTrace pid provider Interrupt Vector */ +#define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_EVTCHN 0x93 /* Xen HVM Event Channel Interrupt Vector */ /* * Entries in the Global Descriptor Table (GDT) ==== //depot/SpectraBSD/stable/9/sys/sys/kernel.h#2 (text) ==== @@ -102,6 +102,11 @@ SI_SUB_VM = 0x1000000, /* virtual memory system init*/ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_KVM_RSRC = 0x1A00000, /* kvm operational limits*/ + SI_SUB_HYPERVISOR = 0x1A40000, /* + * Hypervisor detection and + * virtualization support + * setup. + */ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ ==== //depot/SpectraBSD/stable/9/sys/x86/x86/local_apic.c#2 (text) ==== @@ -90,6 +90,7 @@ #define IRQ_TIMER (NUM_IO_INTS + 1) #define IRQ_SYSCALL (NUM_IO_INTS + 2) #define IRQ_DTRACE_RET (NUM_IO_INTS + 3) +#define IRQ_EVTCHN (NUM_IO_INTS + 4) /* * Support for local APICs. Local APICs manage interrupts on each @@ -310,6 +311,9 @@ #ifdef KDTRACE_HOOKS lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif +#ifdef XENHVM + lapics[apic_id].la_ioint_irqs[IDT_EVTCHN - APIC_IO_INTS] = IRQ_EVTCHN; +#endif #ifdef SMP @@ -1139,6 +1143,10 @@ if (irq == IRQ_DTRACE_RET) continue; #endif +#ifdef XENHVM + if (irq == IRQ_EVTCHN) + continue; +#endif db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); ==== //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#1 (text) ==== @@ -1,10 +1,11 @@ /****************************************************************************** - * evtchn.c + * xen_intr.c * - * A simplified event channel for para-drivers in unmodified linux + * Xen event and interrupt services for x86 PV and HVM guests. * * Copyright (c) 2002-2005, K A Fraser * Copyright (c) 2005, Intel Corporation + * Copyright (c) 2012, Spectra Logic Corporation, Justin T. Gibbs * * This file may be distributed separately from the Linux kernel, or * incorporated into other software packages, subject to the following license: @@ -41,284 +42,430 @@ #include #include #include +#include + +#include +#include +#include #include #include + #include #include -#include -#include +#include #include -#if defined(__i386__) -#define __ffs(word) (ffs(word) - 1) -#elif defined(__amd64__) -static inline unsigned long __ffs(unsigned long word) -{ - __asm__("bsfq %1,%0" - :"=r" (word) - :"rm" (word)); /* XXXRW: why no "cc"? */ - return word; -} -#else -#error "evtchn: unsupported architecture" -#endif +static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); + +/** + * Per-cpu event channel processing state. + */ +struct xen_intr_pcpu_data { + /** + * The last event channel bitmap section (level one bit) processed. + * This is used to ensure we scan all ports before + * servicing an already servied port again. + */ + u_int last_processed_l1i; -#define is_valid_evtchn(x) ((x) != 0) -#define evtchn_from_irq(x) (irq_evtchn[irq].evtchn) + /** + * The last event channel processed within the event channel + * bitmap being scanned. + */ + u_int last_processed_l2i; -static struct { - struct mtx lock; - driver_intr_t *handler; - void *arg; - int evtchn; - int close:1; /* close on unbind_from_irqhandler()? */ - int inuse:1; - int in_handler:1; - int mpsafe:1; -} irq_evtchn[256]; -static int evtchn_to_irq[NR_EVENT_CHANNELS] = { - [0 ... NR_EVENT_CHANNELS-1] = -1 }; + /** Pointer to this CPU's interrupt statistic counter. */ + u_long *evtchn_intrcnt; -static struct mtx irq_alloc_lock; -static device_t xenpci_device; + /** + * A bitmap of ports that can be serviced from this CPU. + * A set bit means interrupt handling is enabled. + */ + u_long evtchn_enabled[sizeof(u_long) * 8]; +}; -#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) +/* + * Start the scan at port 0 by initializing the last scanned + * location as the highest numbered event channel port. + */ +DPCPU_DEFINE(struct xen_intr_pcpu_data, xen_intr_pcpu) = { + .last_processed_l1i = LONG_BIT - 1, + .last_processed_l2i = LONG_BIT - 1 +}; -static unsigned int -alloc_xen_irq(void) -{ - static int warned; - unsigned int irq; +#define is_valid_evtchn(x) ((x) != 0) - mtx_lock(&irq_alloc_lock); +struct xenisrc { + struct intsrc xenisrc_intsrc; + enum evtchn_type xenisrc_type; + int xenisrc_cpu; /* VCPU for delivery. */ + int xenisrc_vector; /* Global isrc vector number. */ + evtchn_port_t xenisrc_port; + int xenisrc_pirq; + int xenisrc_virq; /* close on unbind? */ + u_int xenisrc_close:1; + u_int xenisrc_needs_eoi:1; + u_int xenisrc_shared:1; /* Shared with other domains. */ +}; - for (irq = 1; irq < ARRAY_SIZE(irq_evtchn); irq++) { - if (irq_evtchn[irq].inuse) - continue; - irq_evtchn[irq].inuse = 1; - mtx_unlock(&irq_alloc_lock); - return irq; - } +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) - if (!warned) { - warned = 1; - printf("alloc_xen_irq: No available IRQ to bind to: " - "increase irq_evtchn[] size in evtchn.c.\n"); - } +static void xen_intr_suspend(struct pic *); +static void xen_intr_resume(struct pic *); +static void xen_intr_enable_source(struct intsrc *isrc); +static void xen_intr_disable_source(struct intsrc *isrc, int eoi); +static void xen_intr_eoi_source(struct intsrc *isrc); +static void xen_intr_enable_intr(struct intsrc *isrc); +static void xen_intr_disable_intr(struct intsrc *isrc); +static int xen_intr_vector(struct intsrc *isrc); +static int xen_intr_source_pending(struct intsrc *isrc); +static int xen_intr_config_intr(struct intsrc *isrc, + enum intr_trigger trig, enum intr_polarity pol); +static int xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id); - mtx_unlock(&irq_alloc_lock); +static void xen_intr_pirq_enable_source(struct intsrc *isrc); +static void xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi); +static void xen_intr_pirq_eoi_source(struct intsrc *isrc); +static void xen_intr_pirq_enable_intr(struct intsrc *isrc); - return -ENOSPC; -} +/** + * PIC interface for all event channel port types except physical IRQs. + */ +struct pic xen_intr_pic = { + .pic_enable_source = xen_intr_enable_source, + .pic_disable_source = xen_intr_disable_source, + .pic_eoi_source = xen_intr_eoi_source, + .pic_enable_intr = xen_intr_enable_intr, + .pic_disable_intr = xen_intr_disable_intr, + .pic_vector = xen_intr_vector, + .pic_source_pending = xen_intr_source_pending, + .pic_suspend = xen_intr_suspend, + .pic_resume = xen_intr_resume, + .pic_config_intr = xen_intr_config_intr, + .pic_assign_cpu = xen_intr_assign_cpu +}; -static void -free_xen_irq(int irq) -{ +/** + * PIC interface for all event channel representing + * physical interrupt sources. + */ +struct pic xen_intr_pirq_pic = { + .pic_enable_source = xen_intr_pirq_enable_source, + .pic_disable_source = xen_intr_pirq_disable_source, + .pic_eoi_source = xen_intr_pirq_eoi_source, + .pic_enable_intr = xen_intr_pirq_enable_intr, + .pic_disable_intr = xen_intr_disable_intr, + .pic_vector = xen_intr_vector, + .pic_source_pending = xen_intr_source_pending, + .pic_suspend = xen_intr_suspend, + .pic_resume = xen_intr_resume, + .pic_config_intr = xen_intr_config_intr, + .pic_assign_cpu = xen_intr_assign_cpu +}; - mtx_lock(&irq_alloc_lock); - irq_evtchn[irq].inuse = 0; - mtx_unlock(&irq_alloc_lock); -} +static struct mtx xen_intr_isrc_lock; +static int xen_intr_isrc_count; +static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; -int -irq_to_evtchn_port(int irq) +/*------------------------- Private Functions --------------------------------*/ +/** + * Disable signal delivery for an event channel port on the + * specified CPU. + * + * \param port The event channel port to mask. + * + * This API is used to manage the port<=>CPU binding of event + * channel handlers. + * + * \note This operation does not preclude reception of an event + * for this event channel on another CPU. To mask the + * event channel globally, use evtchn_mask(). + */ +static inline void +evtchn_cpu_mask_port(u_int cpu, evtchn_port_t port) { + struct xen_intr_pcpu_data *pcpu; - return irq_evtchn[irq].evtchn; + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + clear_bit(port, pcpu->evtchn_enabled); } -void -mask_evtchn(int port) +/** + * Enable signal delivery for an event channel port on the + * specified CPU. + * + * \param port The event channel port to unmask. + * + * This API is used to manage the port<=>CPU binding of event + * channel handlers. + * + * \note This operation does not guarantee that event delivery + * is enabled for this event channel port. The port must + * also be globally enabled. See evtchn_unmask(). + */ +static inline void +evtchn_cpu_unmask_port(u_int cpu, evtchn_port_t port) { - shared_info_t *s = HYPERVISOR_shared_info; + struct xen_intr_pcpu_data *pcpu; - synch_set_bit(port, &s->evtchn_mask[0]); + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + set_bit(port, pcpu->evtchn_enabled); } -void -unmask_evtchn(int port) +/** + * Allocate and register a per-cpu Xen upcall interrupt counter. + * + * \param cpu The cpu for which to register this interrupt count. + */ +static void +xen_intr_intrcnt_add(u_int cpu) { - evtchn_unmask_t op = { .port = port }; + char buf[MAXCOMLEN + 1]; + struct xen_intr_pcpu_data *pcpu; + + pcpu = DPCPU_ID_PTR(cpu, xen_intr_pcpu); + if (pcpu->evtchn_intrcnt != NULL) + return; - HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &op); + snprintf(buf, sizeof(buf), "cpu%d:xen", cpu); + intrcnt_add(buf, &pcpu->evtchn_intrcnt); } -int -bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) +/** + * Search for an already allocated but currently unused Xen interrupt + * source object. + * + * \param type Restrict the search to interrupt sources of the given + * type. + * + * \return A pointer to a free Xen interrupt source object or NULL. + */ +static struct xenisrc * +xen_intr_find_unused_isrc(enum evtchn_type type) { - struct evtchn_alloc_unbound alloc_unbound; - unsigned int irq; - int error; + int isrc_idx; - irq = alloc_xen_irq(); - if (irq < 0) - return irq; + KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn isrc lock not held")); - mtx_lock(&irq_evtchn[irq].lock); + for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx ++) { + struct xenisrc *isrc; + u_int vector; - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = remote_domain; - error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - if (error) { - mtx_unlock(&irq_evtchn[irq].lock); - free_xen_irq(irq); - return (-error); + vector = FIRST_EVTCHN_INT + isrc_idx; + isrc = (struct xenisrc *)intr_lookup_source(vector); + if (isrc != NULL + && isrc->xenisrc_type == EVTCHN_TYPE_UNBOUND) { + KASSERT(isrc->xenisrc_intsrc.is_handlers == 0, + ("Free evtchn still has handlers")); + isrc->xenisrc_type = type; + return (isrc); + } } - - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = alloc_unbound.port; - irq_evtchn[irq].close = 1; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; - - evtchn_to_irq[alloc_unbound.port] = irq; - - unmask_evtchn(alloc_unbound.port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; - return (0); + return (NULL); } -int -bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp) +/** + * Allocate a Xen interrupt source object. + * + * \param type The type of interrupt source to create. + * + * \return A pointer to a newly allocated Xen interrupt source + * object or NULL. + */ +static struct xenisrc * +xen_intr_alloc_isrc(enum evtchn_type type) { - struct evtchn_bind_interdomain bind_interdomain; - unsigned int irq; - int error; + static int warned; + struct xenisrc *isrc; + int vector; - irq = alloc_xen_irq(); - if (irq < 0) - return irq; + KASSERT(mtx_owned(&xen_intr_isrc_lock), ("Evtchn alloc lock not held")); - mtx_lock(&irq_evtchn[irq].lock); - - bind_interdomain.remote_dom = remote_domain; - bind_interdomain.remote_port = remote_port; - error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (error) { - mtx_unlock(&irq_evtchn[irq].lock); - free_xen_irq(irq); - return (-error); + if (xen_intr_isrc_count > NR_EVENT_CHANNELS) { + if (!warned) { + warned = 1; + printf("xen_intr_alloc: Event channels exhausted.\n"); + } + return (NULL); } + vector = FIRST_EVTCHN_INT + xen_intr_isrc_count; + xen_intr_isrc_count++; - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = bind_interdomain.local_port; - irq_evtchn[irq].close = 1; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; + mtx_unlock(&xen_intr_isrc_lock); + isrc = malloc(sizeof(*isrc), M_XENINTR, M_WAITOK | M_ZERO); + isrc->xenisrc_intsrc.is_pic = &xen_intr_pic; + isrc->xenisrc_vector = vector; + isrc->xenisrc_type = type; + intr_register_source(&isrc->xenisrc_intsrc); + mtx_lock(&xen_intr_isrc_lock); - evtchn_to_irq[bind_interdomain.local_port] = irq; - - unmask_evtchn(bind_interdomain.local_port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; - return (0); + return (isrc); } - -int -bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp) +/** + * Attempt to free an active Xen interrupt source object. + * + * \param isrc The interrupt source object to release. + * + * \returns EBUSY if the source is still in use, otherwise 0. + */ +static int +xen_intr_release_isrc(struct xenisrc *isrc) { - unsigned int irq; - irq = alloc_xen_irq(); - if (irq < 0) - return irq; + mtx_lock(&xen_intr_isrc_lock); + if (isrc->xenisrc_intsrc.is_handlers != 0) { + mtx_unlock(&xen_intr_isrc_lock); + return (EBUSY); + } + evtchn_mask_port(isrc->xenisrc_port); + xen_intr_port_to_isrc[isrc->xenisrc_port] = NULL; - mtx_lock(&irq_evtchn[irq].lock); + /* Rebind port to CPU 0. */ + evtchn_cpu_mask_port(isrc->xenisrc_cpu, isrc->xenisrc_port); + evtchn_cpu_unmask_port(0, isrc->xenisrc_port); - irq_evtchn[irq].handler = handler; - irq_evtchn[irq].arg = arg; - irq_evtchn[irq].evtchn = caller_port; - irq_evtchn[irq].close = 0; - irq_evtchn[irq].mpsafe = (irqflags & INTR_MPSAFE) != 0; + if (isrc->xenisrc_close != 0) { + struct evtchn_close close = { .port = isrc->xenisrc_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + } - evtchn_to_irq[caller_port] = irq; - - unmask_evtchn(caller_port); - - mtx_unlock(&irq_evtchn[irq].lock); - - if (irqp) - *irqp = irq; + isrc->xenisrc_cpu = 0; + isrc->xenisrc_type = EVTCHN_TYPE_UNBOUND; + isrc->xenisrc_port = 0; + mtx_unlock(&xen_intr_isrc_lock); return (0); } -void -unbind_from_irqhandler(unsigned int irq) +/** + * Associate an interrupt handler with an already allocated local Xen + * event channel port. + * + * \param isrcp The returned Xen interrupt object associated with + * the specified local port. + * \param local_port The event channel to bind. + * \param type The event channel type of local_port. + * \param intr_owner The device making this bind request. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. + */ +static int +xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port, + enum evtchn_type type, device_t intr_owner, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type flags, + xen_intr_handle_t *port_handlep) { - int evtchn; + struct xenisrc *isrc; + int error; - mtx_lock(&irq_evtchn[irq].lock); + *isrcp = NULL; + if (port_handlep == NULL) { + device_printf(intr_owner, + "xen_intr_bind_isrc: Bad event handle\n"); + return (EINVAL); + } - evtchn = evtchn_from_irq(irq); - - if (is_valid_evtchn(evtchn)) { - evtchn_to_irq[evtchn] = -1; - mask_evtchn(evtchn); - if (irq_evtchn[irq].close) { - struct evtchn_close close = { .port = evtchn }; - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) - panic("EVTCHNOP_close failed"); + mtx_lock(&xen_intr_isrc_lock); + isrc = xen_intr_find_unused_isrc(type); + if (isrc == NULL) { + isrc = xen_intr_alloc_isrc(type); + if (isrc == NULL) { + mtx_unlock(&xen_intr_isrc_lock); + return (ENOSPC); } } + isrc->xenisrc_port = local_port; + xen_intr_port_to_isrc[local_port] = isrc; + mtx_unlock(&xen_intr_isrc_lock); - irq_evtchn[irq].handler = NULL; - irq_evtchn[irq].evtchn = 0; + error = intr_add_handler(device_get_nameunit(intr_owner), + isrc->xenisrc_vector, filter, handler, arg, + flags|INTR_EXCL, port_handlep); + if (error != 0) { + device_printf(intr_owner, + "xen_intr_bind_irq: intr_add_handler failed\n"); + xen_intr_release_isrc(isrc); + return (error); + } + *isrcp = isrc; + return (0); +} - mtx_unlock(&irq_evtchn[irq].lock); +/** + * Lookup a Xen interrupt source object given an interrupt binding handle. + * + * \param handle A handle initialized by a previous call to + * xen_intr_bind_isrc(). + * + * \returns A pointer to the Xen interrupt source object associated + * with the given interrupt handle. NULL if no association + * currently exists. + */ +static struct xenisrc * +xen_intr_isrc(xen_intr_handle_t handle) +{ + struct intr_handler *ih; - while (irq_evtchn[irq].in_handler) - cpu_relax(); + ih = handle; + if (ih == NULL || ih->ih_event == NULL) + return (NULL); - free_xen_irq(irq); + return (ih->ih_event->ie_source); } -void notify_remote_via_irq(int irq) +/** + * Determine the event channel ports at the given section of the + * event port bitmap which have pending events for the given cpu. + * + * \param pcpu The Xen interrupt pcpu data for the cpu being querried. + * \param sh The Xen shared info area. + * \param idx The index of the section of the event channel bitmap to + * inspect. + * + * \returns A u_long with bits set for every event channel with pending + * events. + */ +static inline u_long +xen_intr_active_ports(struct xen_intr_pcpu_data *pcpu, shared_info_t *sh, + u_int idx) { - int evtchn; - - evtchn = evtchn_from_irq(irq); - if (is_valid_evtchn(evtchn)) - notify_remote_via_evtchn(evtchn); + return (sh->evtchn_pending[idx] + & ~sh->evtchn_mask[idx] + & pcpu->evtchn_enabled[idx]); } -static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh, - unsigned int idx) +/** + * Interrupt handler for processing all Xen event channel events. + * + * \param trap_frame The trap frame context for the current interrupt. + */ +void +xen_intr_handle_upcall(struct trapframe *trap_frame) { - return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]); -} + u_int l1i, l2i, port, cpu; + u_long masked_l1, masked_l2; + struct xenisrc *isrc; + shared_info_t *s; + vcpu_info_t *v; + struct xen_intr_pcpu_data *pc; + u_long l1, l2; -static void -evtchn_interrupt(void *arg) -{ - unsigned int l1i, l2i, port; - unsigned long masked_l1, masked_l2; - /* XXX: All events are bound to vcpu0 but irq may be redirected. */ - int cpu = 0; /*smp_processor_id();*/ - driver_intr_t *handler; - void *handler_arg; - int irq, handler_mpsafe; - shared_info_t *s = HYPERVISOR_shared_info; - vcpu_info_t *v = &s->vcpu_info[cpu]; - struct pcpu *pc = pcpu_find(cpu); - unsigned long l1, l2; + cpu = PCPU_GET(cpuid); + pc = DPCPU_PTR(xen_intr_pcpu); + s = HYPERVISOR_shared_info; + v = &s->vcpu_info[cpu]; v->evtchn_upcall_pending = 0; @@ -331,137 +478,520 @@ l1 = atomic_readandclear_long(&v->evtchn_pending_sel); - l1i = pc->pc_last_processed_l1i; - l2i = pc->pc_last_processed_l2i; + l1i = pc->last_processed_l1i; + l2i = pc->last_processed_l2i; + (*pc->evtchn_intrcnt)++; while (l1 != 0) { l1i = (l1i + 1) % LONG_BIT; masked_l1 = l1 & ((~0UL) << l1i); - if (masked_l1 == 0) { /* if we masked out all events, wrap around to the beginning */ + if (masked_l1 == 0) { + /* + * if we masked out all events, wrap around + * to the beginning. + */ l1i = LONG_BIT - 1; l2i = LONG_BIT - 1; continue; } - l1i = __ffs(masked_l1); + l1i = ffsl(masked_l1) - 1; do { - l2 = active_evtchns(cpu, s, l1i); + l2 = xen_intr_active_ports(pc, s, l1i); l2i = (l2i + 1) % LONG_BIT; masked_l2 = l2 & ((~0UL) << l2i); - if (masked_l2 == 0) { /* if we masked out all events, move on */ + if (masked_l2 == 0) { + /* if we masked out all events, move on */ l2i = LONG_BIT - 1; break; } - l2i = __ffs(masked_l2); + l2i = ffsl(masked_l2) - 1; /* process port */ port = (l1i * LONG_BIT) + l2i; synch_clear_bit(port, &s->evtchn_pending[0]); - irq = evtchn_to_irq[port]; - if (irq < 0) - continue; - - mtx_lock(&irq_evtchn[irq].lock); - handler = irq_evtchn[irq].handler; - handler_arg = irq_evtchn[irq].arg; - handler_mpsafe = irq_evtchn[irq].mpsafe; - if (unlikely(handler == NULL)) { - printf("Xen IRQ%d (port %d) has no handler!\n", - irq, port); - mtx_unlock(&irq_evtchn[irq].lock); + isrc = xen_intr_port_to_isrc[port]; + if (unlikely(isrc == NULL)) { + printf("Xen port %d not configured!\n", port); continue; } - irq_evtchn[irq].in_handler = 1; - mtx_unlock(&irq_evtchn[irq].lock); - //local_irq_enable(); - if (!handler_mpsafe) - mtx_lock(&Giant); - handler(handler_arg); - if (!handler_mpsafe) - mtx_unlock(&Giant); - //local_irq_disable(); + intr_execute_handlers(&isrc->xenisrc_intsrc, + trap_frame); - mtx_lock(&irq_evtchn[irq].lock); - irq_evtchn[irq].in_handler = 0; - mtx_unlock(&irq_evtchn[irq].lock); + /* + * If this is the final port processed, + * we'll pick up here+1 next time. + */ + pc->last_processed_l1i = l1i; + pc->last_processed_l2i = l2i; - /* if this is the final port processed, we'll pick up here+1 next time */ - pc->pc_last_processed_l1i = l1i; - pc->pc_last_processed_l2i = l2i; - } while (l2i != LONG_BIT - 1); - l2 = active_evtchns(cpu, s, l1i); - if (l2 == 0) /* we handled all ports, so we can clear the selector bit */ + l2 = xen_intr_active_ports(pc, s, l1i); + if (l2 == 0) { + /* + * We handled all ports, so we can clear the + * selector bit. + */ l1 &= ~(1UL << l1i); + } } } -void -irq_suspend(void) +static int +xen_intr_init(void *dummy __unused) +{ + struct xen_intr_pcpu_data *pcpu; + + mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); + + /* Default to all event channels processed on CPU 0. */ + pcpu = DPCPU_ID_PTR(0, xen_intr_pcpu); + memset(pcpu->evtchn_enabled, ~0, sizeof(pcpu->evtchn_enabled)); + + /* + * Register CPU0's interrupt count manually as we aren't + * guaranteed to see a call to xen_intr_assign_cpu() + * before our first interrupt. + */ + xen_intr_intrcnt_add(0); + + intr_register_pic(&xen_intr_pic); + + return (0); +} +SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_ANY, xen_intr_init, NULL); + +/*--------------------------- Common PIC Functions ---------------------------*/ +/** + * Prepare this PIC for system suspension. + */ +static void +xen_intr_suspend(struct pic *unused) +{ +} + +/** + * Return this PIC to service after being suspended. + */ +static void +xen_intr_resume(struct pic *unused) +{ + u_int port; + + /* + * Mask events for all ports. They will be unmasked after + * drivers have re-registered their handlers. + */ + for (port = 0; port < NR_EVENT_CHANNELS; port++) + evtchn_mask_port(port); +} + +/** + * Disable a Xen interrupt source. + * + * \param isrc The interrupt source to disable. + */ +static void +xen_intr_disable_intr(struct intsrc *isrc) +{ + struct xenisrc *xenisrc = (struct xenisrc *)isrc; + + evtchn_mask_port(xenisrc->xenisrc_port); +} + +/** + * Determine the global interrupt vector number for + * a Xen interrupt source. + * + * \param isrc The interrupt source to query. + * + * \return The vector number corresponding to the given interrupt source. + */ +static int +xen_intr_vector(struct intsrc *isrc) +{ + struct xenisrc *xenisrc = (struct xenisrc *)isrc; + + return (xenisrc->xenisrc_vector); +} + +/** + * Determine whether or not interrupt events are pending on the + * the given interrupt source. + * + * \param isrc The interrupt source to query. + * + * \returns 0 if no events are pending, otherwise non-zero. + */ +static int +xen_intr_source_pending(struct intsrc *isrc) +{ + /* + * EventChannels are edge triggered and never masked. + * There can be no pending events. + */ + return (0); +} + +/** + * Perform configuration of an interrupt source. + * + * \param isrc The interrupt source to configure. + * \param trig Edge or level. + * \param pol Active high or low. + * + * \returns 0 if no events are pending, otherwise non-zero. + */ +static int +xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + /* Configuration is only possible via the evtchn apis. */ + return (ENODEV); +} + +/** + * Configure CPU affinity for interrupt source event delivery. + * + * \param isrc The interrupt source to configure. + * \param apic_id The apic id of the CPU for handling future events. + * + * \returns 0 if successful, otherwise an errno. + */ +static int +xen_intr_assign_cpu(struct intsrc *isrc, u_int apic_id) { - struct xenpci_softc *scp = device_get_softc(xenpci_device); + struct evtchn_bind_vcpu bind_vcpu; + struct xenisrc *xenisrc; + u_int to_cpu; + + if ((xen_os_capabilities() & XOC_PCPU_INTR_BINDING) == 0) + return (EOPNOTSUPP); + + to_cpu = apic_cpuid(apic_id); + xen_intr_intrcnt_add(to_cpu); + + mtx_lock(&xen_intr_isrc_lock); + xenisrc = (struct xenisrc *)isrc; + if (!is_valid_evtchn(xenisrc->xenisrc_port)) { + mtx_unlock(&xen_intr_isrc_lock); + return (EINVAL); + } + + bind_vcpu.port = xenisrc->xenisrc_port; + bind_vcpu.vcpu = to_cpu; /* - * Take our interrupt handler out of the list of handlers - * that can handle this irq. + * Allow interrupts to be fielded on the new VCPU before + * we ask the hypervisor to deliver them there. */ - if (scp->intr_cookie != NULL) { - if (BUS_TEARDOWN_INTR(device_get_parent(xenpci_device), - xenpci_device, scp->res_irq, scp->intr_cookie) != 0) - printf("intr teardown failed.. continuing\n"); - scp->intr_cookie = NULL; + evtchn_cpu_unmask_port(to_cpu, xenisrc->xenisrc_port); + if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) { + /* Commit to new binding by removing the old one. */ + evtchn_cpu_mask_port(xenisrc->xenisrc_cpu, + xenisrc->xenisrc_port); + xenisrc->xenisrc_cpu = to_cpu; + } else { + /* Roll-back to previous binding. */ + evtchn_cpu_mask_port(to_cpu, xenisrc->xenisrc_port); } + mtx_unlock(&xen_intr_isrc_lock); + return (0); +} + +/*------------------- Virtual Interrupt Source PIC Functions -----------------*/ +/* + * Mask a level triggered interrupt source. + * + * \param isrc The interrupt source to mask (if necessary). + * \param eoi If non-zero, perform any necessary end-of-interrupt + * acknowledgements. + */ +static void +xen_intr_disable_source(struct intsrc *isrc, int eoi) +{ } -void -irq_resume(void) +/* + * Unmask a level triggered interrupt source. + * + * \param isrc The interrupt source to unmask (if necessary). + */ +static void +xen_intr_enable_source(struct intsrc *isrc) +{ +} + +/* + * Perform any necessary end-of-interrupt acknowledgements. + * + * \param isrc The interrupt source to EOI. + */ +static void +xen_intr_eoi_source(struct intsrc *isrc) +{ +} + +/* + * Enable and unmask the interrupt source. + * + * \param isrc The interrupt source to enable. + */ +static void +xen_intr_enable_intr(struct intsrc *isrc) +{ + struct xenisrc *xenisrc = (struct xenisrc *)isrc; + + evtchn_unmask_port(xenisrc->xenisrc_port); +} + +/*------------------ Physical Interrupt Source PIC Functions -----------------*/ +/* + * Mask a level triggered interrupt source. + * + * \param isrc The interrupt source to mask (if necessary). + * \param eoi If non-zero, perform any necessary end-of-interrupt + * acknowledgements. + */ +static void +xen_intr_pirq_disable_source(struct intsrc *isrc, int eoi) +{ + struct xenisrc *xenisrc; + + xenisrc = (struct xenisrc *)isrc; + evtchn_mask_port(xenisrc->xenisrc_port); +} + +/* + * Unmask a level triggered interrupt source. + * + * \param isrc The interrupt source to unmask (if necessary). + */ +static void +xen_intr_pirq_enable_source(struct intsrc *isrc) +{ + struct xenisrc *xenisrc; + + xenisrc = (struct xenisrc *)isrc; + evtchn_unmask_port(xenisrc->xenisrc_port); +} + +/* + * Perform any necessary end-of-interrupt acknowledgements. + * + * \param isrc The interrupt source to EOI. + */ +static void +xen_intr_pirq_eoi_source(struct intsrc *isrc) { - struct xenpci_softc *scp = device_get_softc(xenpci_device); - int evtchn, irq; + struct xenisrc *xenisrc; + + /* XXX Use shared page of flags for this. */ + xenisrc = (struct xenisrc *)isrc; + if (xenisrc->xenisrc_needs_eoi != 0) { + struct physdev_eoi eoi = { .irq = xenisrc->xenisrc_pirq }; - for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++) { - mask_evtchn(evtchn); - evtchn_to_irq[evtchn] = -1; + (void)HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); } +} + +/* + * Enable and unmask the interrupt source. + * + * \param isrc The interrupt source to enable. + */ +static void +xen_intr_pirq_enable_intr(struct intsrc *isrc) +{ +} - for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) - irq_evtchn[irq].evtchn = 0; +/*--------------------------- Public Functions -------------------------------*/ +/*------- API comments for these methods can be found in xen/xenintr.h -------*/ +int +xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + int error; + + error = xen_intr_bind_isrc(&isrc, local_port, EVTCHN_TYPE_PORT, dev, + filter, handler, arg, flags, port_handlep); + if (error != 0) + return (error); - BUS_SETUP_INTR(device_get_parent(xenpci_device), - xenpci_device, scp->res_irq, INTR_TYPE_MISC, - NULL, evtchn_interrupt, NULL, &scp->intr_cookie); + /* + * The Event Channel API didn't open this port, so it is not + * responsible for closing it automatically on unbind. + */ + isrc->xenisrc_close = 0; + return (0); } int -xenpci_irq_init(device_t device, struct xenpci_softc *scp) +xen_intr_alloc_and_bind_local_port(device_t dev, u_int remote_domain, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) { - int irq, cpu; + struct xenisrc *isrc; + struct evtchn_alloc_unbound alloc_unbound; int error; - mtx_init(&irq_alloc_lock, "xen-irq-lock", NULL, MTX_DEF); + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_domain; + error = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } + + error = xen_intr_bind_isrc(&isrc, alloc_unbound.port, EVTCHN_TYPE_PORT, + dev, filter, handler, arg, flags, + port_handlep); + if (error != 0) { + evtchn_close_t close = { .port = alloc_unbound.port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + + isrc->xenisrc_close = 1; + return (0); +} - for (irq = 0; irq < ARRAY_SIZE(irq_evtchn); irq++) - mtx_init(&irq_evtchn[irq].lock, "irq-evtchn", NULL, MTX_DEF); +int +xen_intr_bind_remote_port(device_t dev, u_int remote_domain, + u_int remote_port, driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + struct evtchn_bind_interdomain bind_interdomain; + int error; - for (cpu = 0; cpu < mp_ncpus; cpu++) { - pcpu_find(cpu)->pc_last_processed_l1i = LONG_BIT - 1; - pcpu_find(cpu)->pc_last_processed_l2i = LONG_BIT - 1; + bind_interdomain.remote_dom = remote_domain; + bind_interdomain.remote_port = remote_port; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, + &bind_interdomain); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); } - error = BUS_SETUP_INTR(device_get_parent(device), device, - scp->res_irq, INTR_MPSAFE|INTR_TYPE_MISC, NULL, evtchn_interrupt, - NULL, &scp->intr_cookie); - if (error) + error = xen_intr_bind_isrc(&isrc, bind_interdomain.local_port, + EVTCHN_TYPE_PORT, dev, filter, handler, + arg, flags, port_handlep); + if (error) { + evtchn_close_t close = { .port = bind_interdomain.local_port }; + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); return (error); + } + + /* + * The Event Channel API opened this port, so it is + * responsible for closing it automatically on unbind. + */ + isrc->xenisrc_close = 1; + return (0); +} + +int +xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type flags, xen_intr_handle_t *port_handlep) +{ + struct xenisrc *isrc; + struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = cpu }; + int error; + + /* Ensure the target CPU is ready to handle evtchn interrupts. */ + xen_intr_intrcnt_add(cpu); + + isrc = NULL; + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); + if (error != 0) { + /* + * XXX Trap Hypercall error code Linuxisms in + * the HYPERCALL layer. + */ + return (-error); + } - xenpci_device = device; + error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev, + filter, handler, arg, flags, port_handlep); + if (error == 0) + error = intr_event_bind(isrc->xenisrc_intsrc.is_event, cpu); + + if (error != 0) { + evtchn_close_t close = { .port = bind_virq.port }; + + xen_intr_unbind(*port_handlep); + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close)) + panic("EVTCHNOP_close failed"); + return (error); + } + /* + * The Event Channel API opened this port, so it is + * responsible for closing it automatically on unbind. + */ + isrc->xenisrc_close = 1; return (0); } + +void +xen_intr_unbind(xen_intr_handle_t *port_handlep) +{ + struct intr_handler *handler; + struct xenisrc *isrc; + + handler = *port_handlep; + *port_handlep = NULL; + isrc = xen_intr_isrc(handler); + if (isrc == NULL) + return; + + intr_remove_handler(handler); + xen_intr_release_isrc(isrc); +} + +void +xen_intr_signal(xen_intr_handle_t handle) +{ + struct xenisrc *xenisrc; + + xenisrc = xen_intr_isrc(handle); + if (xenisrc != NULL) { + KASSERT(xenisrc->xenisrc_type == EVTCHN_TYPE_PORT, + ("evtchn_signal on something other than a local port")); + struct evtchn_send send = { .port = xenisrc->xenisrc_port }; + (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); + } +} + +evtchn_port_t +xen_intr_port(xen_intr_handle_t handle) +{ + struct xenisrc *isrc; + + isrc = xen_intr_isrc(handle); + if (isrc == NULL) + return (0); + + return (isrc->xenisrc_port); +} + ==== //depot/SpectraBSD/stable/9/sys/xen/evtchn.h#2 (text) ==== @@ -1,94 +1,87 @@ /****************************************************************************** * evtchn.h * - * Communication via Xen event channels. - * Also definitions for the device that demuxes notifications to userspace. + * Interface to /dev/xen/evtchn. + * + * Copyright (c) 2003-2005, K A Fraser + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. * - * Copyright (c) 2004, K A Fraser + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. * - * $FreeBSD: stable/9/sys/xen/evtchn.h 196322 2009-08-17 14:38:59Z jhb $ + * $FreeBSD$ */ -#ifndef __ASM_EVTCHN_H__ -#define __ASM_EVTCHN_H__ -#include -#include -#include -#include +#ifndef __XEN_EVTCHN_H__ +#define __XEN_EVTCHN_H__ /* - * LOW-LEVEL DEFINITIONS + * Bind a fresh port to VIRQ @virq. */ +#define IOCTL_EVTCHN_BIND_VIRQ \ + _IOWR('E', 4, struct ioctl_evtchn_bind_virq) +struct ioctl_evtchn_bind_virq { + unsigned int virq; + unsigned int port; +}; /* - * Unlike notify_remote_via_evtchn(), this is safe to use across - * save/restore. Notifications on a broken connection are silently dropped. + * Bind a fresh port to remote <@remote_domain, @remote_port>. */ -void notify_remote_via_irq(int irq); +#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ + _IOWR('E', 5, struct ioctl_evtchn_bind_interdomain) +struct ioctl_evtchn_bind_interdomain { + unsigned int remote_domain, remote_port; + unsigned int port; +}; - -/* Entry point for notifications into Linux subsystems. */ -void evtchn_do_upcall(struct trapframe *frame); - -/* Entry point for notifications into the userland character device. */ -void evtchn_device_upcall(int port); +/* + * Allocate a fresh port for binding to @remote_domain. + */ +#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ + _IOWR('E', 6, struct ioctl_evtchn_bind_unbound_port) +struct ioctl_evtchn_bind_unbound_port { + unsigned int remote_domain; + unsigned int port; +}; -void mask_evtchn(int port); - -void unmask_evtchn(int port); - -#ifdef SMP -void rebind_evtchn_to_cpu(int port, unsigned int cpu); -#else -#define rebind_evtchn_to_cpu(port, cpu) ((void)0) -#endif - -static inline -int test_and_set_evtchn_mask(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - return synch_test_and_set_bit(port, s->evtchn_mask); -} - -static inline void -clear_evtchn(int port) -{ - shared_info_t *s = HYPERVISOR_shared_info; - synch_clear_bit(port, &s->evtchn_pending[0]); -} - -static inline void -notify_remote_via_evtchn(int port) -{ - struct evtchn_send send = { .port = port }; - (void)HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); -} - /* - * Use these to access the event channel underlying the IRQ handle returned - * by bind_*_to_irqhandler(). + * Unbind previously allocated @port. */ -int irq_to_evtchn_port(int irq); - -void ipi_pcpu(unsigned int cpu, int vector); +#define IOCTL_EVTCHN_UNBIND \ + _IOW('E', 7, struct ioctl_evtchn_unbind) +struct ioctl_evtchn_unbind { + unsigned int port; +}; /* - * CHARACTER-DEVICE DEFINITIONS + * Send event to previously allocated @port. */ +#define IOCTL_EVTCHN_NOTIFY \ + _IOW('E', 8, struct ioctl_evtchn_notify) +struct ioctl_evtchn_notify { + unsigned int port; +}; -#define PORT_NORMAL 0x0000 -#define PORT_EXCEPTION 0x8000 -#define PORTIDX_MASK 0x7fff +/* Clear and reinitialise the event buffer. Clear error condition. */ +#define IOCTL_EVTCHN_RESET \ + _IO('E', 9) -/* /dev/xen/evtchn resides at device number major=10, minor=200 */ -#define EVTCHN_MINOR 200 - -/* /dev/xen/evtchn ioctls: */ -/* EVTCHN_RESET: Clear and reinit the event buffer. Clear error condition. */ -#define EVTCHN_RESET _IO('E', 1) -/* EVTCHN_BIND: Bind to the specified event-channel port. */ -#define EVTCHN_BIND _IO('E', 2) -/* EVTCHN_UNBIND: Unbind from the specified event-channel port. */ -#define EVTCHN_UNBIND _IO('E', 3) - -#endif /* __ASM_EVTCHN_H__ */ +#endif /* __XEN_EVTCHN_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/evtchn/evtchn_dev.c#3 (text) ==== @@ -22,28 +22,23 @@ #include #include #include +#include #include -#include #include -#include #include #include -#include + #include +#include +#include typedef struct evtchn_sotfc { struct selinfo ev_rsel; } evtchn_softc_t; - -#ifdef linuxcrap -/* NB. This must be shared amongst drivers if more things go in /dev/xen */ -static devfs_handle_t xen_dev_dir; -#endif - /* Only one process may open /dev/xen/evtchn at any time. */ static unsigned long evtchn_dev_inuse; @@ -72,12 +67,12 @@ void -evtchn_device_upcall(int port) +evtchn_device_upcall(evtchn_port_t port) { mtx_lock(&upcall_lock); - mask_evtchn(port); - clear_evtchn(port); + evtchn_mask_port(port); + evtchn_clear_port(port); if ( ring != NULL ) { if ( (ring_prod - ring_cons) < EVTCHN_RING_SIZE ) { @@ -208,7 +203,7 @@ mtx_lock_spin(&lock); for ( i = 0; i < (count/2); i++ ) if ( test_bit(kbuf[i], &bound_ports[0]) ) - unmask_evtchn(kbuf[i]); + evtchn_unmask_port(kbuf[i]); mtx_unlock_spin(&lock); rc = count; @@ -224,6 +219,7 @@ { int rc = 0; +#ifdef NOTYET mtx_lock_spin(&lock); switch ( cmd ) @@ -249,6 +245,7 @@ } mtx_unlock_spin(&lock); +#endif return rc; } @@ -309,7 +306,7 @@ mtx_lock_spin(&lock); for ( i = 0; i < NR_EVENT_CHANNELS; i++ ) if ( synch_test_and_clear_bit(i, &bound_ports[0]) ) - mask_evtchn(i); + evtchn_mask_port(i); mtx_unlock_spin(&lock); evtchn_dev_inuse = 0; @@ -352,34 +349,6 @@ evtchn_dev->si_drv1 = malloc(sizeof(evtchn_softc_t), M_DEVBUF, M_WAITOK); bzero(evtchn_dev->si_drv1, sizeof(evtchn_softc_t)); - /* XXX I don't think we need any of this rubbish */ -#if 0 - if ( err != 0 ) - { - printk(KERN_ALERT "Could not register /dev/misc/evtchn\n"); - return err; - } - - /* (DEVFS) create directory '/dev/xen'. */ - xen_dev_dir = devfs_mk_dir(NULL, "xen", NULL); - - /* (DEVFS) &link_dest[pos] == '../misc/evtchn'. */ - pos = devfs_generate_path(evtchn_miscdev.devfs_handle, - &link_dest[3], - sizeof(link_dest) - 3); - if ( pos >= 0 ) - strncpy(&link_dest[pos], "../", 3); - /* (DEVFS) symlink '/dev/xen/evtchn' -> '../misc/evtchn'. */ - (void)devfs_mk_symlink(xen_dev_dir, - "evtchn", - DEVFS_FL_DEFAULT, - &link_dest[pos], - &symlink_handle, - NULL); - - /* (DEVFS) automatically destroy the symlink with its destination. */ - devfs_auto_unregister(evtchn_miscdev.devfs_handle, symlink_handle); -#endif if (bootverbose) printf("Event-channel device installed.\n"); ==== //depot/SpectraBSD/stable/9/sys/xen/hvm.h#2 (text) ==== @@ -27,4 +27,46 @@ return (xhv.value); } +/** The callback method types for Hypervisor event delivery to our domain. */ +enum { + HVM_CB_TYPE_GSI, + HVM_CB_TYPE_PCI_INTX, + HVM_CB_TYPE_VECTOR, + HVM_CB_TYPE_MASK = 0xFF, + HVM_CB_TYPE_SHIFT = 56 +}; + +/** Format for specifying a GSI type callback. */ +enum { + HVM_CB_GSI_GSI_MASK = 0xFFFFFFFF, + HVM_CB_GSI_GSI_SHIFT = 0 +}; +#define HVM_CALLBACK_GSI(gsi) \ + (((uint64_t)HVM_CB_TYPE_GSI << HVM_CB_TYPE_SHIFT) \ + | ((gsi) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT) + +/** Format for specifying a virtual PCI interrupt line GSI style callback. */ +enum { + HVM_CB_PCI_INTX_INTPIN_MASK = 0x3, + HVM_CB_PCI_INTX_INTPIN_SHIFT = 0, + HVM_CB_PCI_INTX_SLOT_MASK = 0x1F, + HVM_CB_PCI_INTX_SLOT_SHIFT = 11, +}; +#define HVM_CALLBACK_PCI_INTX(slot, pin) \ + (((uint64_t)HVM_CB_TYPE_PCI_INTX << HVM_CB_TYPE_SHIFT) \ + | (((slot) & HVM_CB_PCI_INTX_SLOT_MASK) << HVM_CB_PCI_INTX_SLOT_SHIFT) \ + | (((pin) & HVM_CB_PCI_INTX_INTPIN_MASK) << HVM_CB_PCI_INTX_INTPIN_SHIFT)) + +/** Format for specifying a direct IDT vector injection style callback. */ +enum { + HVM_CB_VECTOR_VECTOR_MASK = 0xFFFFFFFF, + HVM_CB_VECTOR_VECTOR_SHIFT = 0 +}; +#define HVM_CALLBACK_VECTOR(vector) \ + (((uint64_t)HVM_CB_TYPE_VECTOR << HVM_CB_TYPE_SHIFT) \ + | (((vector) & HVM_CB_GSI_GSI_MASK) << HVM_CB_GSI_GSI_SHIFT)) + +void xen_hvm_set_callback(device_t); +void xen_hvm_suspend(void); +void xen_hvm_resume(void); #endif /* __XEN_HVM_H__ */ ==== //depot/SpectraBSD/stable/9/sys/xen/interface/event_channel.h#3 (text) ==== @@ -71,8 +71,11 @@ #define EVTCHNOP_reset 10 /* ` } */ +#ifndef __XEN_EVTCHN_PORT_DEFINED__ typedef uint32_t evtchn_port_t; DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as ==== //depot/SpectraBSD/stable/9/sys/xen/xen_intr.h#2 (text) ==== @@ -1,103 +1,202 @@ -/* -*- Mode:C; c-basic-offset:4; tab-width:4 -*- */ +/****************************************************************************** + * xen_intr.h + * + * APIs for managing Xen event channel, virtual IRQ, and physical IRQ + * notifications. + * + * Copyright (c) 2004, K A Fraser + * Copyright (c) 2012, Spectra Logic Corporation + * + * This file may be distributed separately from the Linux kernel, or + * incorporated into other software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * $FreeBSD: stable/9/sys/xen/xen_intr.h 214077 2010-10-19 20:53:30Z gibbs $ + */ #ifndef _XEN_INTR_H_ #define _XEN_INTR_H_ -/* -* The flat IRQ space is divided into two regions: -* 1. A one-to-one mapping of real physical IRQs. This space is only used -* if we have physical device-access privilege. This region is at the -* start of the IRQ space so that existing device drivers do not need -* to be modified to translate physical IRQ numbers into our IRQ space. -* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These -* are bound using the provided bind/unbind functions. -* -* -* $FreeBSD: stable/9/sys/xen/xen_intr.h 214077 2010-10-19 20:53:30Z gibbs $ -*/ +#ifndef __XEN_EVTCHN_PORT_DEFINED__ +typedef uint32_t evtchn_port_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); +#define __XEN_EVTCHN_PORT_DEFINED__ 1 +#endif -#define PIRQ_BASE 0 -#define NR_PIRQS 128 +/** Registered Xen interrupt callback handle. */ +typedef void * xen_intr_handle_t; -#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS) -#define NR_DYNIRQS 128 +/** + * Associate an already allocated local event channel port an interrupt + * handler. + * + * \param dev The device making this bind request. + * \param local_port The event channel to bind. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. + */ +int xen_intr_bind_local_port(device_t dev, evtchn_port_t local_port, + driver_filter_t filter, driver_intr_t handler, void *arg, + enum intr_type irqflags, xen_intr_handle_t *handlep); -#define NR_IRQS (NR_PIRQS + NR_DYNIRQS) - -#define pirq_to_irq(_x) ((_x) + PIRQ_BASE) -#define irq_to_pirq(_x) ((_x) - PIRQ_BASE) - -#define dynirq_to_irq(_x) ((_x) + DYNIRQ_BASE) -#define irq_to_dynirq(_x) ((_x) - DYNIRQ_BASE) - -/* - * Dynamic binding of event channels and VIRQ sources to guest IRQ space. +/** + * Allocate a local event channel port, accessible by the specified + * remote/foreign domain and, if successful, associate the port with + * the specified interrupt handler. + * + * \param dev The device making this bind request. + * \param remote_domain Remote domain grant permission to signal the + * newly allocated local port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ +int xen_intr_alloc_and_bind_local_port(device_t dev, + u_int remote_domain, driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); -/* - * Bind a caller port event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +/** + * Associate the specified interrupt handler with the remote event + * channel port specified by remote_domain and remote_port. + * + * \param dev The device making this bind request. + * \param remote_domain The domain peer for this event channel connection. + * \param remote_port Remote domain's local port number for this event + * channel port. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_caller_port_to_irqhandler(unsigned int caller_port, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +int xen_intr_bind_remote_port(device_t dev, u_int remote_domain, + evtchn_port_t remote_port, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type irqflags, + xen_intr_handle_t *handlep); -/* - * Bind a listening port to an interrupt handler. If successful, the - * guest IRQ number is returned in *irqp. Return zero on success or - * errno otherwise. +/** + * Associate the specified interrupt handler with the specified Xen + * virtual interrupt source. + * + * \param dev The device making this bind request. + * \param virq The Xen virtual IRQ number for the Xen interrupt + * source being hooked. + * \param cpu The cpu on which interrupt events should be delivered. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_listening_port_to_irqhandler(unsigned int remote_domain, - const char *devname, driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +int xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, + driver_filter_t filter, driver_intr_t handler, + void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); -/* - * Bind a VIRQ to an interrupt handler. If successful, the guest IRQ - * number is returned in *irqp. Return zero on success or errno - * otherwise. +/** + * Associate an interprocessor interrupt vector with an interrupt handler. + * + * \param dev The device making this bind request. + * \param ipi The interprocessor interrupt vector number of the + * interrupt source being hooked. + * \param cpu The cpu receiving the IPI. + * \param filter An interrupt filter handler. Specify NULL + * to always dispatch to the ithread handler. + * \param handler An interrupt ithread handler. Optional (can + * specify NULL) if all necessary event actions + * are performed by filter. + * \param arg Argument to present to both filter and handler. + * \param irqflags Interrupt handler flags. See sys/bus.h. + * \param handlep Pointer to an opaque handle used to manage this + * registration. + * + * \returns 0 on success, otherwise an errno. */ -extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, - const char *devname, driver_filter_t filter, driver_intr_t handler, - void *arg, unsigned long irqflags, unsigned int *irqp); +int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, + driver_filter_t filter, enum intr_type irqflags, + xen_intr_handle_t *handlep); -/* - * Bind an IPI to an interrupt handler. If successful, the guest - * IRQ number is returned in *irqp. Return zero on success or errno - * otherwise. +/** + * Unbind an interrupt handler from its interrupt source. + * + * \param handlep A pointer to the opaque handle that was initialized + * at the time the interrupt source was bound. + * + * \returns 0 on success, otherwise an errno. + * + * \note The event channel, if any, that was allocated at bind time is + * closed upon successful return of this method. + * + * \note It is always safe to call xen_intr_unbind() on a handle that + * has been initilized to NULL. */ -extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, - const char *devname, driver_filter_t filter, - unsigned long irqflags, unsigned int *irqp); +void xen_intr_unbind(xen_intr_handle_t *handle); -/* - * Bind an interdomain event channel to an interrupt handler. If - * successful, the guest IRQ number is returned in *irqp. Return zero - * on success or errno otherwise. +/** + * Signal the remote peer of an interrupt source associated with an + * event channel port. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \note For xen interrupt sources other than event channel ports, + * this method takes no action. */ -extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, - unsigned int remote_port, const char *devname, - driver_intr_t handler, void *arg, - unsigned long irqflags, unsigned int *irqp); +void xen_intr_signal(xen_intr_handle_t handle); -/* - * Unbind an interrupt handler using the guest IRQ number returned - * when it was bound. +/** + * Get the local event channel port number associated with this interrupt + * source. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \returns 0 if the handle is invalid, otherwise positive port number. */ -extern void unbind_from_irqhandler(unsigned int irq); - -static __inline__ int irq_cannonicalize(unsigned int irq) -{ - return (irq == 2) ? 9 : irq; -} - -extern void disable_irq(unsigned int); -extern void disable_irq_nosync(unsigned int); -extern void enable_irq(unsigned int); - -extern void irq_suspend(void); -extern void irq_resume(void); - -extern void idle_block(void); -extern int ap_cpu_initclocks(int cpu); +evtchn_port_t xen_intr_port(xen_intr_handle_t handle); #endif /* _XEN_INTR_H_ */ ==== //depot/SpectraBSD/stable/9/sys/xen/xenbus/xenbus.c#2 (text) ==== @@ -222,42 +222,6 @@ return (0); } -int -xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port) -{ - struct evtchn_alloc_unbound alloc_unbound; - int err; - - alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = xenbus_get_otherend_id(dev); - - err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, - &alloc_unbound); - - if (err) { - xenbus_dev_fatal(dev, -err, "allocating event channel"); - return (-err); - } - *port = alloc_unbound.port; - return (0); -} - -int -xenbus_free_evtchn(device_t dev, evtchn_port_t port) -{ - struct evtchn_close close; - int err; - - close.port = port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); - if (err) { - xenbus_dev_error(dev, -err, "freeing event channel %d", port); - return (-err); - } - return (0); -} - XenbusState xenbus_read_driver_state(const char *path) { ==== //depot/SpectraBSD/stable/9/sys/xen/xenbus/xenbusvar.h#4 (text) ==== @@ -195,39 +195,6 @@ int xenbus_grant_ring(device_t dev, unsigned long ring_mfn, grant_ref_t *refp); /** - * Allocate an event channel for the given XenBus device. - * - * \param dev The device for which to allocate the event channel. - * \param port[out] The port identifier for the allocated event channel. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * A successfully allocated event channel should be free'd using - * xenbus_free_evtchn(). - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_alloc_evtchn(device_t dev, evtchn_port_t *port); - -/** - * Free an existing event channel. - * - * \param dev The device which allocated this event channel. - * \param port The port identifier for the event channel to free. - * - * \return On success, 0. Otherwise an errno value indicating the - * type of failure. - * - * \note On error, \a dev will be switched to the XenbusStateClosing - * state and the returned error is saved in the per-device error node - * for \a dev in the XenStore. - */ -int xenbus_free_evtchn(device_t dev, evtchn_port_t port); - -/** * Record the given errno, along with the given, printf-style, formatted * message in dev's device specific error node in the XenStore. * ==== //depot/SpectraBSD/stable/9/sys/xen/xenstore/xenstore.c#4 (text) ==== @@ -52,7 +52,6 @@ #include #include -#include #include #include #include @@ -244,8 +243,8 @@ */ int evtchn; - /** Interrupt number for our event channel. */ - u_int irq; + /** Handle for XenStore interrupts. */ + xen_intr_handle_t xen_intr_handle; /** * Interrupt driven config hook allowing us to defer @@ -504,11 +503,10 @@ xen_store->req_prod += avail; /* - * notify_remote_via_evtchn implies mb(). The other side - * will see the change to req_prod at the time of the - * interrupt. + * evtchn_signal() implies mb(). The other side will see + * the change to req_prod at the time of the interrupt. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); @@ -596,11 +594,10 @@ xen_store->rsp_cons += avail; /* - * notify_remote_via_evtchn implies mb(). The producer - * will see the updated consumer index when the event - * is delivered. + * evtchn_signal() implies mb(). The producer will see + * the updated consumer index when the event is delivered. */ - notify_remote_via_evtchn(xs.evtchn); + xen_intr_signal(xs.xen_intr_handle); } return (0); @@ -1067,11 +1064,11 @@ xen_store->rsp_cons = xen_store->rsp_prod; } - if (xs.irq) - unbind_from_irqhandler(xs.irq); + xen_intr_unbind(&xs.xen_intr_handle); - error = bind_caller_port_to_irqhandler(xs.evtchn, "xenstore", - xs_intr, NULL, INTR_TYPE_NET, &xs.irq); + error = xen_intr_bind_local_port(xs.xs_dev, xs.evtchn, + /*filter*/NULL, xs_intr, /*arg*/NULL, INTR_TYPE_NET|INTR_MPSAFE, + &xs.xen_intr_handle); if (error) { log(LOG_WARNING, "XENSTORE request irq failed %i\n", error); return (error); @@ -1167,7 +1164,6 @@ sx_init(&xs.suspend_mutex, "xenstore suspend"); mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF); mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF); - xs.irq = 0; /* Initialize the shared memory rings to talk to xenstored */ error = xs_init_comms(); Change 537407 by justing@justing-ns1 on 2012/04/20 23:46:52 sys/x86/xen/xen_intr.c: sys/xen/xen_intr.h: Add a new method to the Xen interrupt API: xen_intr_describe(). This mirrors the bus_intr_describe() method available to physical interrupt resources and allows a "descriptive tag" to be appended to an interrupt source. This allows, for example, a driver with per-cpu interrupts to append a cpu number to each of its handler names. Affected files ... ... //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#3 edit ... //depot/SpectraBSD/stable/9/sys/xen/xen_intr.h#3 edit Differences ... ==== //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#3 (text) ==== @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -951,6 +952,23 @@ return (0); } +int +xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) +{ + char descr[MAXCOMLEN + 1]; + struct xenisrc *isrc; + va_list ap; + + isrc = xen_intr_isrc(port_handle); + if (isrc == NULL) + return (EINVAL); + + va_start(ap, fmt); + vsnprintf(descr, sizeof(descr), fmt, ap); + va_end(ap); + return (intr_describe(isrc->xi_vector, port_handle, descr)); +} + void xen_intr_unbind(xen_intr_handle_t *port_handlep) { ==== //depot/SpectraBSD/stable/9/sys/xen/xen_intr.h#3 (text) ==== @@ -177,6 +177,21 @@ void xen_intr_unbind(xen_intr_handle_t *handle); /** + * Add a description to an interrupt handler. + * + * \param handle The opaque handle that was initialized at the time + * the interrupt source was bound. + * + * \param fmt The sprintf compatible format string for the description, + * followed by optional sprintf arguments. + * + * \returns 0 on success, otherwise an errno. + */ +int +xen_intr_describe(xen_intr_handle_t port_handle, const char *fmt, ...) + __attribute__((format(printf, 2, 3))); + +/** * Signal the remote peer of an interrupt source associated with an * event channel port. * Change 537408 by justing@justing-ns1 on 2012/04/21 00:15:27 Correct bugs related to binding a Xen virtual interrupt (e.g. the PV Timer) to an event channel port and callback function. sys/x86/xen/xen_intr.c: o In xen_unbind_isrc() narrow the window of the upcall interrupt handler attempting to service a recently unbound port that now has no handler, by explicitly clearing the "port pending" bit. Losing the race is benign other than a "stray" interrupt being reported. Fully closing the race would require a spin-lock which is too expensive for the upcall code path. o In xen_intr_assign_cpu(), treat VIRQ interrupt sources specially. These sources are bound to a particular CPU at the time of the bind Hypercall, and issuing a further, explicit, bind Hypercall will fail. o In xen_intr_assign_cpu(), correct the "fallback on bind failure" code to handle the case when the source and destination CPUs are the same. This can happen during a VM resume. o In xen_intr_bind_virq(), explicitly call xen_intr_assign_cpu() if the bind_virq call occurs too early in system startup for AP cpus to be started. This guarantees that the PCPU event channel masks are setup correctly on the very first VIRQ interrupt, so that delivery occurs on the correct CPU. Affected files ... ... //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#4 edit Differences ... ==== //depot/SpectraBSD/stable/9/sys/x86/xen/xen_intr.c#4 (text) ==== @@ -46,6 +46,7 @@ #include #include +#include #include #include @@ -322,7 +323,7 @@ return (EBUSY); } evtchn_mask_port(isrc->xi_port); - xen_intr_port_to_isrc[isrc->xi_port] = NULL; + evtchn_clear_port(isrc->xi_port); /* Rebind port to CPU 0. */ evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); @@ -334,6 +335,7 @@ panic("EVTCHNOP_close failed"); } + xen_intr_port_to_isrc[isrc->xi_port] = NULL; isrc->xi_cpu = 0; isrc->xi_type = EVTCHN_TYPE_UNBOUND; isrc->xi_port = 0; @@ -517,10 +519,8 @@ synch_clear_bit(port, &s->evtchn_pending[0]); isrc = xen_intr_port_to_isrc[port]; - if (unlikely(isrc == NULL)) { - printf("Xen port %d not configured!\n", port); + if (unlikely(isrc == NULL)) continue; - } intr_execute_handlers(&isrc->xi_intsrc, trap_frame); @@ -671,6 +671,7 @@ struct evtchn_bind_vcpu bind_vcpu; struct xenisrc *isrc; u_int to_cpu; + int error; if ((xen_os_capabilities() & XOC_PCPU_INTR_BINDING) == 0) return (EOPNOTSUPP); @@ -685,6 +686,19 @@ return (EINVAL); } + if (isrc->xi_type == EVTCHN_TYPE_VIRQ) { + /* + * Virtual IRQs are associated with a cpu by + * the Hypervisor at evtchn_bind_virq time, so + * all we need to do is update the per-CPU masks. + */ + evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); + isrc->xi_cpu = to_cpu; + evtchn_cpu_unmask_port(isrc->xi_cpu, isrc->xi_port); + mtx_unlock(&xen_intr_isrc_lock); + return (0); + } + bind_vcpu.port = isrc->xi_port; bind_vcpu.vcpu = to_cpu; @@ -693,13 +707,16 @@ * we ask the hypervisor to deliver them there. */ evtchn_cpu_unmask_port(to_cpu, isrc->xi_port); - if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) { - /* Commit to new binding by removing the old one. */ - evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); - isrc->xi_cpu = to_cpu; - } else { - /* Roll-back to previous binding. */ - evtchn_cpu_mask_port(to_cpu, isrc->xi_port); + error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu); + if (isrc->xi_cpu != to_cpu) { + if (error == 0) { + /* Commit to new binding by removing the old one. */ + evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port); + isrc->xi_cpu = to_cpu; + } else { + /* Roll-back to previous binding. */ + evtchn_cpu_mask_port(to_cpu, isrc->xi_port); + } } mtx_unlock(&xen_intr_isrc_lock); return (0); @@ -944,6 +961,16 @@ return (error); } + if (isrc->xi_cpu != cpu) { + /* + * Too early in the boot process for the generic interrupt + * code to perform the binding. Update our event channel + * masks manually so events can't fire on the wrong cpu + * during AP startup. + */ + xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]); + } + /* * The Event Channel API opened this port, so it is * responsible for closing it automatically on unbind. Change 537428 by willa@willa_repo on 2012/04/21 14:51:24 Introduce a paravirtualized timer driver for Xen. This driver requires a Xen hypervisor that supports vector callbacks, VCPUOP hypercalls, and reports that it has a "safe PV clock". It requires changes 537407 and 537408, which corrected issues related to binding the timer VIRQ to the correct CPU in FreeBSD's generic Xen interrupt handler. sys/dev/xen/timer/timer.c: - Register a PV timer device driver which (currently) implements device_{identify,probe,attach} and stubs device_detach. The detach routine requires functionality not provided by timecounters(4). The suspend and resume routines need additional work (due to Xen requiring that the hypercalls be executed on the target VCPU), and aren't needed for our purposes. - Make sure there can only be one device instance of this driver, and that it only registers one eventtimers(4) and one timecounters(4) device interface. Make both interfaces use PCPU data as needed. - Match, with a few style cleanups & API differences, the Xen versions of the "fetch time" functions. - Document the magic scale_delta() better for the i386 version. - When registering the event timer, bind a separate event channel for the timer VIRQ to the device's event timer interrupt handler for each active VCPU. Describe each interrupt as "xen_et:c%d", so they can be identified per CPU in "vmstat -i" or "show intrcnt" in KDB. - When scheduling a timer into the hypervisor, try up to 60 times if the hypervisor rejects the time as being in the past. In the common case, this retry shouldn't happen, and if it does, it should only happen once. This is because the event timer advertises a minimum period of 100usec, which is only less than the usual hypercall round trip time about 1 out of every 100 tries. (Unlike other similar drivers, this one actually checks whether the hypervisor accepted the singleshot timer set hypercall.) sys/conf/files: - Add dev/xen/timer/timer.c if the kernel configuration includes either the XEN or XENHVM options. tools/tools/nanobsd/spectra/common.conf: - Remove ntpd from the Storage Domain startup configuration. Affected files ... ... //depot/SpectraBSD/stable/9/sys/conf/files#5 edit ... //depot/SpectraBSD/stable/9/sys/dev/xen/timer/timer.c#1 add ... //depot/SpectraBSD/stable/9/tools/tools/nanobsd/spectra/common.conf#10 edit Differences ... ==== //depot/SpectraBSD/stable/9/sys/conf/files#5 (text) ==== @@ -3560,3 +3560,4 @@ dev/xen/netback/netback.c optional xen | xenhvm dev/xen/netfront/netfront.c optional xen | xenhvm dev/xen/xenpci/xenpci.c optional xenpci +dev/xen/timer/timer.c optional xen | xenhvm ==== //depot/SpectraBSD/stable/9/tools/tools/nanobsd/spectra/common.conf#10 (text) ==== @@ -110,8 +110,6 @@ echo "zfs_enable=\"YES\"" >> etc/rc.conf echo "zfsd_enable=\"YES\"" >> etc/rc.conf echo "sendmail_enable=\"NONE\"" >> etc/rc.conf - echo "ntpd_enable=\"YES\"" >> etc/rc.conf - echo "ntpd_sync_on_start=\"YES\"" >> etc/rc.conf # This gets started by redline_prep # XXX This is a hack required in order to have syslogd start up # properly with respect to mDNS Change 538405 by willa@willa_repo on 2012/04/23 19:34:13 There is no need to attach this driver differently for PV domains. Affected files ... ... //depot/SpectraBSD/stable/9/sys/dev/xen/timer/timer.c#2 edit Differences ... ==== //depot/SpectraBSD/stable/9/sys/dev/xen/timer/timer.c#2 (text) ==== @@ -564,10 +564,5 @@ sizeof(struct xentimer_softc), }; -#ifdef XENHVM DRIVER_MODULE(xentimer, nexus, xentimer_driver, xentimer_devclass, 0, 0); MODULE_DEPEND(xentimer, nexus, 1, 1, 1); -#else -DRIVER_MODULE(xentimer, xenpci, xentimer_driver, xentimer_devclass, 0, 0); -MODULE_DEPEND(xentimer, xenpci, 1, 1, 1); -#endif Change 650584 by justing@justing_ns1_spectrabsd_integration on 2013/01/12 07:38:35 Delete file that should have been removed in change 537182. sys/dev/xen/xenpci/evtchn.c: HVM Xen interrupt and event channel support is now provided by x86/xen/xen_intr.c leaving this file no longer used by the kernel. Affected files ... ... //SpectraBSD/stable/sys/dev/xen/xenpci/evtchn.c#2 delete Differences ...