Index: mp_machdep.c =================================================================== --- mp_machdep.c (revision 221608) +++ mp_machdep.c (working copy) @@ -337,14 +337,13 @@ { wbinvd(); - atomic_add_int(&smp_tlb_wait, 1); } static void iv_lazypmap(uintptr_t a, uintptr_t b) { + pmap_lazyfix_action(); - atomic_add_int(&smp_tlb_wait, 1); } /* @@ -394,8 +393,8 @@ uint16_t wait; uintptr_t arg1; uintptr_t arg2; - atomic_t started; - atomic_t finished; + int started; + int finished; }; static struct _call_data *call_data; @@ -407,8 +406,8 @@ uintptr_t arg1 = call_data->arg1; uintptr_t arg2 = call_data->arg2; int wait = call_data->wait; - atomic_t *started = &call_data->started; - atomic_t *finished = &call_data->finished; + int *started = &call_data->started; + int *finished = &call_data->finished; /* We only handle function IPIs, not bitmap IPIs */ if (call_data->func_id < APIC_IPI_INTS || call_data->func_id > IPI_BITMAP_VECTOR) @@ -419,18 +418,14 @@ * Notify initiating CPU that I've grabbed the data and am * about to execute the function */ - mb(); - atomic_inc(started); + atomic_add_rel_int(started, 1); /* * At this point the info structure may be out of scope unless wait==1 */ (*func)(arg1, arg2); - if (wait) { - mb(); - atomic_inc(finished); - } - atomic_add_int(&smp_tlb_wait, 1); + if (wait) + atomic_add_rel_int(finished, 1); return (FILTER_HANDLED); } @@ -963,7 +961,7 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; - struct _call_data data; + static struct _call_data data; ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) @@ -976,9 +974,10 @@ call_data->func_id = vector; call_data->arg1 = addr1; call_data->arg2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); + call_data->wait = 1; + atomic_store_rel_int(&call_data->finished, 0); ipi_all_but_self(vector); - while (smp_tlb_wait < ncpu) + while (call_data->finished < ncpu) ia32_pause(); call_data = NULL; mtx_unlock_spin(&smp_ipi_mtx); @@ -988,7 +987,7 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { int ncpu, othercpus; - struct _call_data data; + static struct _call_data data; othercpus = mp_ncpus - 1; if (mask == (u_int)-1) { @@ -1018,12 +1017,13 @@ call_data->func_id = vector; call_data->arg1 = addr1; call_data->arg2 = addr2; - atomic_store_rel_int(&smp_tlb_wait, 0); + call_data->wait = 1; + atomic_store_rel_int(&call_data->finished, 0); if (mask == (u_int)-1) ipi_all_but_self(vector); else ipi_selected(mask, vector); - while (smp_tlb_wait < ncpu) + while (call_data->finished < ncpu) ia32_pause(); call_data = NULL; mtx_unlock_spin(&smp_ipi_mtx); @@ -1092,21 +1092,56 @@ } /* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + static struct _call_data data; + u_int bitmap = 0, mycd = 0, mylock = 0; + u_int old_pending, new_pending; + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (!old_pending) + ipi_pcpu(cpu, RESCHEDULE_VECTOR); + } else { + if (call_data == NULL) { + mycd = 1; + if (!mtx_owned(&smp_ipi_mtx)) { + mylock = 1; + mtx_lock_spin(&smp_ipi_mtx); + } + call_data = &data; + call_data->func_id = ipi; + call_data->wait = 1; + atomic_store_rel_int(&call_data->finished, 0); + } + ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); + if (mycd) { + while (call_data->finished == 0) + ia32_pause(); + call_data = NULL; + if (mylock) + mtx_unlock_spin(&smp_ipi_mtx); + } + } +} + +/* * send an IPI to a set of cpus. */ void ipi_selected(cpumask_t cpus, u_int ipi) { int cpu; - u_int bitmap = 0; - u_int old_pending; - u_int new_pending; - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - } - /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. @@ -1119,19 +1154,7 @@ while ((cpu = ffs(cpus)) != 0) { cpu--; cpus &= ~(1 << cpu); - - if (bitmap) { - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (!old_pending) - ipi_pcpu(cpu, RESCHEDULE_VECTOR); - } else { - KASSERT(call_data != NULL, ("call_data not set")); - ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); - } + ipi_send_cpu(cpu, ipi); } } @@ -1141,14 +1164,6 @@ void ipi_cpu(int cpu, u_int ipi) { - u_int bitmap = 0; - u_int old_pending; - u_int new_pending; - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - } /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit @@ -1159,19 +1174,7 @@ atomic_set_int(&ipi_nmi_pending, 1 << cpu); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); - - if (bitmap) { - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (!old_pending) - ipi_pcpu(cpu, RESCHEDULE_VECTOR); - } else { - KASSERT(call_data != NULL, ("call_data not set")); - ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); - } + ipi_send_cpu(cpu, ipi); } /*