Implement PV IPIs for PVHVM guests and further converge PV and HVM IPI implmementations. Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D Submitted by: gibbs (misc cleanup, table driven config, PCID support) Reviewed by: gibbs sys/x86/xen/xen_intr.c: sys/xen/xen_intr.h: Rename xen_intr_bind_ipi() to xen_intr_alloc_and_bind_ipi(), and remove the ipi vector parameter. This api allocates an event channel port that can be used for ipi services, but knows nothing of the actual ipi for which that port will be used. Removing the unused argument and cleaning up the comments surrounding its declaration helps clarify its actual role. sys/amd64/amd64/mp_machdep.c: sys/amd64/include/cpu.h: sys/i386/i386/mp_machdep.c: sys/i386/include/cpu.h: Implement a generic framework for amd64 and i386 that allows the implementation of certain CPU management functions to be selected at runtime. Currently this is only used for the ipi send function, which we optimize for Xen when running on a Xen hypervisor, but can easily be expanded to support more operations. sys/x86/xen/hvm.c: Implement Xen PV IPI handlers and operations, replacing native send IPI. sys/amd64/include/pcpu.h: sys/i386/include/pcpu.h: sys/i386/include/smp.h: Remove NR_VIRQS and NR_IPIS from FreeBSD headers. NR_VIRQS is defined already for us in the xen interface files. NR_IPIS is only needed in one file per Xen platform and is easily inferred by the IPI vector table that is defined in those files. sys/i386/xen/mp_machdep.c: Restructure to more closely match the HVM implementation by performing table driven IPI setup. Index: amd64/amd64/mp_machdep.c =================================================================== --- amd64/amd64/mp_machdep.c (revision 255160) +++ amd64/amd64/mp_machdep.c (working copy) @@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef XENHVM #include @@ -125,6 +126,11 @@ u_long *ipi_rendezvous_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif +/* Default cpu_ops implementation. */ +struct cpu_ops cpu_ops = { + .ipi_vectored = lapic_ipi_vectored +}; + extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); /* @@ -1118,7 +1124,7 @@ ipi_send_cpu(int cpu, u_int ipi) if (old_pending) return; } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); + cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]); } /* @@ -1390,7 +1396,7 @@ ipi_all_but_self(u_int ipi) CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); + cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int Index: amd64/amd64/pmap.c =================================================================== --- amd64/amd64/pmap.c (revision 255160) +++ amd64/amd64/pmap.c (working copy) @@ -254,30 +254,6 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFL 0, "Is TLB Context ID enabled ?"); int invpcid_works = 0; -/* - * Perform the guaranteed invalidation of all TLB entries. This - * includes the global entries, and entries in all PCIDs, not only the - * current context. The function works both on non-PCID CPUs and CPUs - * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 - * Operations that Invalidate TLBs and Paging-Structure Caches. - */ -static __inline void -invltlb_globpcid(void) -{ - uint64_t cr4; - - cr4 = rcr4(); - load_cr4(cr4 & ~CR4_PGE); - /* - * Although preemption at this point could be detrimental to - * performance, it would not lead to an error. PG_G is simply - * ignored if CR4.PGE is clear. Moreover, in case this block - * is re-entered, the load_cr4() either above or below will - * modify CR4.PGE flushing the TLB. - */ - load_cr4(cr4 | CR4_PGE); -} - static int pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) { Index: amd64/include/cpu.h =================================================================== --- amd64/include/cpu.h (revision 255160) +++ amd64/include/cpu.h (working copy) @@ -54,6 +54,17 @@ #define TRAPF_PC(framep) ((framep)->tf_rip) #ifdef _KERNEL +/* + * Struct containing pointers to CPU management functions whose + * implementation is run time selectable. Selection can be made, + * for example, based on detection of a particular CPU variant or + * hypervisor environment. + */ +struct cpu_ops { + void (*ipi_vectored)(u_int, int); +}; + +extern struct cpu_ops cpu_ops; extern char btext[]; extern char etext[]; Index: amd64/include/cpufunc.h =================================================================== --- amd64/include/cpufunc.h (revision 255160) +++ amd64/include/cpufunc.h (working copy) @@ -461,7 +461,35 @@ invltlb(void) load_cr3(rcr3()); } +#ifndef CR4_PGE +#define CR4_PGE 0x00000080 /* Page global enable */ +#endif + /* + * Perform the guaranteed invalidation of all TLB entries. This + * includes the global entries, and entries in all PCIDs, not only the + * current context. The function works both on non-PCID CPUs and CPUs + * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 + * Operations that Invalidate TLBs and Paging-Structure Caches. + */ +static __inline void +invltlb_globpcid(void) +{ + uint64_t cr4; + + cr4 = rcr4(); + load_cr4(cr4 & ~CR4_PGE); + /* + * Although preemption at this point could be detrimental to + * performance, it would not lead to an error. PG_G is simply + * ignored if CR4.PGE is clear. Moreover, in case this block + * is re-entered, the load_cr4() either above or below will + * modify CR4.PGE flushing the TLB. + */ + load_cr4(cr4 | CR4_PGE); +} + +/* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ Index: amd64/include/pcpu.h =================================================================== --- amd64/include/pcpu.h (revision 255160) +++ amd64/include/pcpu.h (working copy) @@ -33,15 +33,6 @@ #error "sys/cdefs.h is a prerequisite for this file" #endif -#if defined(XEN) || defined(XENHVM) -#ifndef NR_VIRQS -#define NR_VIRQS 24 -#endif -#ifndef NR_IPIS -#define NR_IPIS 2 -#endif -#endif - /* * The SMP parts are setup in pmap.c and locore.s for the BSP, and * mp_machdep.c sets up the data for the AP's to "see" when they awake. Index: i386/i386/mp_machdep.c =================================================================== --- i386/i386/mp_machdep.c (revision 255160) +++ i386/i386/mp_machdep.c (working copy) @@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef XENHVM #include @@ -170,6 +171,11 @@ u_long *ipi_lazypmap_counts[MAXCPU]; static u_long *ipi_hardclock_counts[MAXCPU]; #endif +/* Default cpu_ops implementation. */ +struct cpu_ops cpu_ops = { + .ipi_vectored = lapic_ipi_vectored +}; + /* * Local data and functions. */ @@ -1209,7 +1215,7 @@ ipi_send_cpu(int cpu, u_int ipi) if (old_pending) return; } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); + cpu_ops.ipi_vectored(ipi, cpu_apic_ids[cpu]); } /* @@ -1460,7 +1466,7 @@ ipi_all_but_self(u_int ipi) CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); + cpu_ops.ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } int Index: i386/include/cpu.h =================================================================== --- i386/include/cpu.h (revision 255160) +++ i386/include/cpu.h (working copy) @@ -54,6 +54,17 @@ #define TRAPF_PC(framep) ((framep)->tf_eip) #ifdef _KERNEL +/* + * Struct containing pointers to CPU management functions whose + * implementation is run time selectable. Selection can be made, + * for example, based on detection of a particular CPU variant or + * hypervisor environment. + */ +struct cpu_ops { + void (*ipi_vectored)(u_int, int); +}; + +extern struct cpu_ops cpu_ops; extern char btext[]; extern char etext[]; Index: i386/include/pcpu.h =================================================================== --- i386/include/pcpu.h (revision 255160) +++ i386/include/pcpu.h (working copy) @@ -44,15 +44,6 @@ * other processors" */ -#if defined(XEN) || defined(XENHVM) -#ifndef NR_VIRQS -#define NR_VIRQS 24 -#endif -#ifndef NR_IPIS -#define NR_IPIS 2 -#endif -#endif - #if defined(XEN) /* These are peridically updated in shared_info, and then copied here. */ Index: i386/include/smp.h =================================================================== --- i386/include/smp.h (revision 255160) +++ i386/include/smp.h (working copy) @@ -84,11 +84,6 @@ void smp_masked_invltlb(cpuset_t mask); #ifdef XEN void ipi_to_irq_init(void); - -#define RESCHEDULE_VECTOR 0 -#define CALL_FUNCTION_VECTOR 1 -#define NR_IPIS 2 - #endif #endif /* !LOCORE */ #endif /* SMP */ Index: i386/xen/mp_machdep.c =================================================================== --- i386/xen/mp_machdep.c (revision 255160) +++ i386/xen/mp_machdep.c (working copy) @@ -99,25 +99,37 @@ extern void failsafe_callback(void); extern void pmap_lazyfix_action(void); /*--------------------------- Forward Declarations ---------------------------*/ -static void assign_cpu_ids(void); -static void set_interrupt_apic_ids(void); -static int start_all_aps(void); -static int start_ap(int apic_id); -static void release_aps(void *dummy); +static driver_filter_t smp_reschedule_interrupt; +static driver_filter_t smp_call_function_interrupt; +static void assign_cpu_ids(void); +static void set_interrupt_apic_ids(void); +static int start_all_aps(void); +static int start_ap(int apic_id); +static void release_aps(void *dummy); +/*---------------------------------- Macros ----------------------------------*/ +#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) + /*-------------------------------- Local Types -------------------------------*/ typedef void call_data_func_t(uintptr_t , uintptr_t); -/* - * Store data from cpu_add() until later in the boot when we actually setup - * the APs. - */ struct cpu_info { int cpu_present:1; int cpu_bsp:1; int cpu_disabled:1; }; +struct xen_ipi_handler +{ + driver_filter_t *filter; + const char *description; +}; + +enum { + RESCHEDULE_VECTOR, + CALL_FUNCTION_VECTOR, +}; + /*-------------------------------- Global Data -------------------------------*/ static u_int hyperthreading_cpus; static cpuset_t hyperthreading_cpus_mask; @@ -161,8 +173,14 @@ static volatile u_int cpu_ipi_pending[MAXCPU]; static int cpu_logical; static int cpu_cores; +static const struct xen_ipi_handler xen_ipis[] = +{ + [RESCHEDULE_VECTOR] = { smp_reschedule_interrupt, "resched" }, + [CALL_FUNCTION_VECTOR] = { smp_call_function_interrupt,"callfunc" } +}; + /*------------------------------- Per-CPU Data -------------------------------*/ -DPCPU_DEFINE(xen_intr_handle_t, ipi_port[NR_IPIS]); +DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); DPCPU_DEFINE(struct vcpu_info *, vcpu_info); /*------------------------------ Implementation ------------------------------*/ @@ -362,7 +380,7 @@ iv_lazypmap(uintptr_t a, uintptr_t b) /* * These start from "IPI offset" APIC_IPI_INTS */ -static call_data_func_t *ipi_vectors[] = +static call_data_func_t *ipi_vectors[6] = { iv_rendezvous, iv_invltlb, @@ -427,7 +445,7 @@ smp_call_function_interrupt(void *unused) call_data->func_id > IPI_BITMAP_VECTOR) panic("invalid function id %u", call_data->func_id); - func = ipi_vectors[call_data->func_id - APIC_IPI_INTS]; + func = ipi_vectors[IPI_TO_IDX(call_data->func_id)]; /* * Notify initiating CPU that I've grabbed the data and am * about to execute the function @@ -473,44 +491,43 @@ cpu_mp_announce(void) static int xen_smp_cpu_init(unsigned int cpu) { - int rc; - xen_intr_handle_t irq_handle; + xen_intr_handle_t *ipi_handle; + const struct xen_ipi_handler *ipi; + int idx, rc; - DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL); - DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL); + ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); + for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { - /* - * The PCPU variable pc_device is not initialized on i386 PV, - * so we have to use the root_bus device in order to setup - * the IPIs. - */ - rc = xen_intr_bind_ipi(root_bus, RESCHEDULE_VECTOR, - cpu, smp_reschedule_interrupt, INTR_TYPE_TTY, &irq_handle); - if (rc < 0) - goto fail; - xen_intr_describe(irq_handle, "resched%u", cpu); - DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], irq_handle); + /* + * The PCPU variable pc_device is not initialized on i386 PV, + * so we have to use the root_bus device in order to setup + * the IPIs. + */ + rc = xen_intr_alloc_and_bind_ipi(root_bus, cpu, + ipi->filter, INTR_TYPE_TTY, &ipi_handle[idx]); + if (rc != 0) { + printf("Unable to allocate a XEN IPI port. " + "Error %d\n", rc); + break; + } + xen_intr_describe(ipi_handle[idx], "%s", ipi->description); + } - printf("[XEN] IPI cpu=%d port=%d vector=RESCHEDULE_VECTOR (%d)\n", - cpu, xen_intr_port(irq_handle), RESCHEDULE_VECTOR); + for (;idx < nitems(xen_ipis); idx++) + ipi_handle[idx] = NULL; - rc = xen_intr_bind_ipi(root_bus, CALL_FUNCTION_VECTOR, - cpu, smp_call_function_interrupt, INTR_TYPE_TTY, &irq_handle); - if (rc < 0) - goto fail; - xen_intr_describe(irq_handle, "callfunc%u", cpu); - DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], irq_handle); + if (rc == 0) + return (0); - printf("[XEN] IPI cpu=%d port=%d vector=CALL_FUNCTION_VECTOR (%d)\n", - cpu, xen_intr_port(irq_handle), CALL_FUNCTION_VECTOR); + /* Either all are successfully mapped, or none at all. */ + for (idx = 0; idx < nitems(xen_ipis); idx++) { + if (ipi_handle[idx] == NULL) + continue; - return (0); + xen_intr_unbind(ipi_handle[idx]); + ipi_handle[idx] = NULL; + } - fail: - xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[RESCHEDULE_VECTOR])); - DPCPU_ID_SET(cpu, ipi_port[RESCHEDULE_VECTOR], NULL); - xen_intr_unbind(DPCPU_ID_GET(cpu, ipi_port[CALL_FUNCTION_VECTOR])); - DPCPU_ID_SET(cpu, ipi_port[CALL_FUNCTION_VECTOR], NULL); return (rc); } @@ -980,8 +997,8 @@ start_ap(int apic_id) static void ipi_pcpu(int cpu, u_int ipi) { - KASSERT((ipi <= NR_IPIS), ("invalid IPI")); - xen_intr_signal(DPCPU_ID_GET(cpu, ipi_port[ipi])); + KASSERT((ipi <= nitems(xen_ipis)), ("invalid IPI")); + xen_intr_signal(DPCPU_ID_GET(cpu, ipi_handle[ipi])); } /* Index: x86/xen/hvm.c =================================================================== --- x86/xen/hvm.c (revision 255160) +++ x86/xen/hvm.c (working copy) @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008 Citrix Systems, Inc. + * Copyright (c) 2008, 2013 Citrix Systems, Inc. * Copyright (c) 2012 Spectra Logic Corporation * All rights reserved. * @@ -33,10 +33,20 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include +#include +#include + #include + #include +#include +#include +#include + #include #include #include @@ -44,30 +54,407 @@ __FBSDID("$FreeBSD$"); #include #include -#include -#include - #include #include +/*--------------------------- Forward Declarations ---------------------------*/ +static driver_filter_t xen_smp_rendezvous_action; +static driver_filter_t xen_invltlb; +static driver_filter_t xen_invlpg; +static driver_filter_t xen_invlrng; +static driver_filter_t xen_invlcache; +#ifdef __i386__ +static driver_filter_t xen_lazypmap; +#endif +static driver_filter_t xen_ipi_bitmap_handler; +static driver_filter_t xen_cpustop_handler; +static driver_filter_t xen_cpususpend_handler; +static driver_filter_t xen_cpustophard_handler; + +/*---------------------------- Extern Declarations ---------------------------*/ +/* Variables used by mp_machdep to perform the MMU related IPIs */ +extern volatile int smp_tlb_wait; +extern vm_offset_t smp_tlb_addr2; +#ifdef __i386__ +extern vm_offset_t smp_tlb_addr1; +#else +extern struct invpcid_descr smp_tlb_invpcid; +extern uint64_t pcid_cr3; +extern int invpcid_works; +extern int pmap_pcid_enabled; +extern pmap_t smp_tlb_pmap; +#endif + +#ifdef __i386__ +extern void pmap_lazyfix_action(void); +#endif + +/*---------------------------------- Macros ----------------------------------*/ +#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS) + +/*-------------------------------- Local Types -------------------------------*/ +struct xen_ipi_handler +{ + driver_filter_t *filter; + const char *description; +}; + +/*-------------------------------- Global Data -------------------------------*/ +enum xen_domain_type xen_domain_type = XEN_NATIVE; + static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support"); -DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); -DPCPU_DEFINE(struct vcpu_info *, vcpu_info); +static const struct xen_ipi_handler xen_ipis[] = +{ + [IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" }, + [IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"}, + [IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" }, + [IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" }, + [IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" }, +#ifdef __i386__ + [IPI_TO_IDX(IPI_LAZYPMAP)] = { xen_lazypmap, "lp" }, +#endif + [IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" }, + [IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" }, + [IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" }, + [IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" }, +}; -/*-------------------------------- Global Data -------------------------------*/ /** * If non-zero, the hypervisor has been configured to use a direct * IDT event callback for interrupt injection. */ int xen_vector_callback_enabled; +/*------------------------------- Per-CPU Data -------------------------------*/ +DPCPU_DEFINE(struct vcpu_info, vcpu_local_info); +DPCPU_DEFINE(struct vcpu_info *, vcpu_info); +DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]); + /*------------------ Hypervisor Access Shared Memory Regions -----------------*/ /** Hypercall table accessed via HYPERVISOR_*_op() methods. */ char *hypercall_stubs; shared_info_t *HYPERVISOR_shared_info; -enum xen_domain_type xen_domain_type = XEN_NATIVE; +/*---------------------------- XEN PV IPI Handlers ---------------------------*/ +/* + * This are C clones of the ASM functions found in apic_vector.s + */ +static int +xen_ipi_bitmap_handler(void *arg) +{ + struct trapframe *frame; + + frame = arg; + ipi_bitmap_handler(*frame); + return (FILTER_HANDLED); +} + +static int +xen_smp_rendezvous_action(void *arg) +{ +#ifdef COUNT_IPIS + int cpu; + + cpu = PCPU_GET(cpuid); + (*ipi_rendezvous_counts[cpu])++; +#endif /* COUNT_IPIS */ + + smp_rendezvous_action(); + return (FILTER_HANDLED); +} + +static int +xen_invltlb(void *arg) +{ + uint64_t cr3; +#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) + int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef COUNT_XINVLTLB_HITS + xhits_gbl[cpu]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invltlb_counts[cpu])++; +#endif /* COUNT_IPIS */ +#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */ + + cr3 = rcr3(); +#ifndef __i386__ + if (pmap_pcid_enabled) { + if (smp_tlb_invpcid.pcid != (uint64_t)-1 && + smp_tlb_invpcid.pcid != 0) { + + if (invpcid_works) { + invpcid(&smp_tlb_invpcid, INVPCID_CTX); + } else { + /* Otherwise reload %cr3 twice. */ + if (cr3 != pcid_cr3) + load_cr3(pcid_cr3); + cr3 |= CR3_PCID_SAVE; + } + } else { + invltlb_globpcid(); + } + } + if (smp_tlb_pmap != NULL) + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &smp_tlb_pmap->pm_save); +#endif + load_cr3(cr3); + atomic_add_int(&smp_tlb_wait, 1); + return (FILTER_HANDLED); +} + +static int +xen_invlpg(void *arg) +{ +#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) + int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef COUNT_XINVLTLB_HITS + xhits_pg[cpu]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlpg_counts[cpu])++; +#endif /* COUNT_IPIS */ +#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */ + +#ifdef __i386__ + invlpg(smp_tlb_addr1); +#else + if (pmap_pcid_enabled) { + if (invpcid_works) { + invpcid(&smp_tlb_invpcid, INVPCID_ADDR); + } else if (smp_tlb_invpcid.pcid == 0) { + invlpg(smp_tlb_invpcid.addr); + } else if (smp_tlb_invpcid.pcid == (uint64_t)-1) { + invltlb_globpcid(); + } else { + uint64_t cr3; + + /* + * PCID supported, but INVPCID is not. + * Temporarily switch to the target address + * space and do INVLPG. + */ + cr3 = rcr3(); + if (cr3 != pcid_cr3) + load_cr3(pcid_cr3 | CR3_PCID_SAVE); + invlpg(smp_tlb_invpcid.addr); + load_cr3(cr3 | CR3_PCID_SAVE); + } + } else + invlpg(smp_tlb_invpcid.addr); +#endif + + atomic_add_int(&smp_tlb_wait, 1); + return (FILTER_HANDLED); +} + +static int +xen_invlrng(void *arg) +{ + vm_offset_t addr; +#if defined(COUNT_XINVLTLB_HITS) || defined(COUNT_IPIS) + int cpu; + + cpu = PCPU_GET(cpuid); +#ifdef COUNT_XINVLTLB_HITS + xhits_rng[cpu]++; +#endif /* COUNT_XINVLTLB_HITS */ +#ifdef COUNT_IPIS + (*ipi_invlrng_counts[cpu])++; +#endif /* COUNT_IPIS */ +#endif /* COUNT_XINVLTLB_HITS || COUNT_IPIS */ + +#ifdef __i386__ + addr = smp_tlb_addr1; + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < smp_tlb_addr2); +#else + addr = smp_tlb_invpcid.addr; + if (pmap_pcid_enabled) { + if (invpcid_works) { + struct invpcid_descr d; + + d = smp_tlb_invpcid; + do { + invpcid(&d, INVPCID_ADDR); + d.addr += PAGE_SIZE; + } while (d.addr < smp_tlb_addr2); + } else if (smp_tlb_invpcid.pcid == 0) { + /* + * kernel pmap - use invlpg to invalidate + * global mapping. + */ + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < smp_tlb_addr2); + } else if (smp_tlb_invpcid.pcid != (uint64_t)-1) { + invltlb_globpcid(); + if (smp_tlb_pmap != NULL) { + CPU_CLR_ATOMIC(PCPU_GET(cpuid), + &smp_tlb_pmap->pm_save); + } + } else { + uint64_t cr3; + + cr3 = rcr3(); + if (cr3 != pcid_cr3) + load_cr3(pcid_cr3 | CR3_PCID_SAVE); + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < smp_tlb_addr2); + load_cr3(cr3 | CR3_PCID_SAVE); + } + } else { + do { + invlpg(addr); + addr += PAGE_SIZE; + } while (addr < smp_tlb_addr2); + } +#endif + + atomic_add_int(&smp_tlb_wait, 1); + return (FILTER_HANDLED); +} + +static int +xen_invlcache(void *arg) +{ +#ifdef COUNT_IPIS + int cpu = PCPU_GET(cpuid); + + cpu = PCPU_GET(cpuid); + (*ipi_invlcache_counts[cpu])++; +#endif /* COUNT_IPIS */ + + wbinvd(); + atomic_add_int(&smp_tlb_wait, 1); + return (FILTER_HANDLED); +} + +#ifdef __i386__ +static int +xen_lazypmap(void *arg) +{ + + pmap_lazyfix_action(); + return (FILTER_HANDLED); +} +#endif + +static int +xen_cpustop_handler(void *arg) +{ + + cpustop_handler(); + return (FILTER_HANDLED); +} + +static int +xen_cpususpend_handler(void *arg) +{ + + cpususpend_handler(); + return (FILTER_HANDLED); +} + +static int +xen_cpustophard_handler(void *arg) +{ + + ipi_nmi_handler(); + return (FILTER_HANDLED); +} + +/* Xen PV IPI sender */ +static void +xen_ipi_vectored(u_int vector, int dest) +{ + xen_intr_handle_t *ipi_handle; + int ipi_idx, to_cpu, self; + + ipi_idx = IPI_TO_IDX(vector); + if (ipi_idx > nitems(xen_ipis)) + panic("IPI out of range"); + + switch(dest) { + case APIC_IPI_DEST_SELF: + ipi_handle = DPCPU_GET(ipi_handle); + xen_intr_signal(ipi_handle[ipi_idx]); + break; + case APIC_IPI_DEST_ALL: + CPU_FOREACH(to_cpu) { + ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); + xen_intr_signal(ipi_handle[ipi_idx]); + } + break; + case APIC_IPI_DEST_OTHERS: + self = PCPU_GET(cpuid); + CPU_FOREACH(to_cpu) { + if (to_cpu != self) { + ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); + xen_intr_signal(ipi_handle[ipi_idx]); + } + } + break; + default: + to_cpu = apic_cpuid(dest); + ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle); + xen_intr_signal(ipi_handle[ipi_idx]); + break; + } +} + +static void +xen_cpu_ipi_init(int cpu) +{ + xen_intr_handle_t *ipi_handle; + const struct xen_ipi_handler *ipi; + device_t dev; + int idx, rc; + + ipi_handle = DPCPU_ID_GET(cpu, ipi_handle); + dev = pcpu_find(cpu)->pc_device; + KASSERT((dev != NULL), ("NULL pcpu device_t")); + + for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) { + + if (ipi->filter == NULL) { + ipi_handle[idx] = NULL; + continue; + } + + rc = xen_intr_alloc_and_bind_ipi(dev, cpu, ipi->filter, + INTR_TYPE_TTY, &ipi_handle[idx]); + if (rc != 0) + panic("Unable to allocate a XEN IPI port"); + xen_intr_describe(ipi_handle[idx], "%s", ipi->description); + } +} + +static void +xen_init_ipis(void) +{ + int i; + + if (xen_hvm_domain() && xen_vector_callback_enabled) { + CPU_FOREACH(i) + xen_cpu_ipi_init(i); + } + + /* Set the xen pv ipi ops to replace the native ones */ + cpu_ops.ipi_vectored = xen_ipi_vectored; +} + +/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/ static uint32_t xen_hvm_cpuid_base(void) { @@ -253,4 +640,5 @@ void xen_hvm_init_cpu(void) } SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL); +SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL); SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL); Index: x86/xen/xen_intr.c =================================================================== --- x86/xen/xen_intr.c (revision 255160) +++ x86/xen/xen_intr.c (working copy) @@ -1010,7 +1010,7 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int } int -xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, +xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, driver_filter_t filter, enum intr_type flags, xen_intr_handle_t *port_handlep) { Index: xen/xen_intr.h =================================================================== --- xen/xen_intr.h (revision 255160) +++ xen/xen_intr.h (working copy) @@ -141,21 +141,20 @@ int xen_intr_bind_virq(device_t dev, u_int virq, u void *arg, enum intr_type irqflags, xen_intr_handle_t *handlep); /** - * Associate an interprocessor interrupt vector with an interrupt handler. + * Allocate a local event channel port for servicing interprocessor + * interupts and, if successful, associate the port with the specified + * interrupt handler. * * \param dev The device making this bind request. - * \param ipi The interprocessor interrupt vector number of the - * interrupt source being hooked. * \param cpu The cpu receiving the IPI. - * \param filter An interrupt filter handler. Specify NULL - * to always dispatch to the ithread handler. + * \param filter The interrupt filter servicing this IPI. * \param irqflags Interrupt handler flags. See sys/bus.h. * \param handlep Pointer to an opaque handle used to manage this * registration. * * \returns 0 on success, otherwise an errno. */ -int xen_intr_bind_ipi(device_t dev, u_int ipi, u_int cpu, +int xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu, driver_filter_t filter, enum intr_type irqflags, xen_intr_handle_t *handlep);