Index: sys/i386/i386/machdep.c =================================================================== --- sys/i386/i386/machdep.c (revision 225875) +++ sys/i386/i386/machdep.c (working copy) @@ -1222,7 +1222,7 @@ void cpu_halt(void) { for (;;) - __asm__ ("hlt"); + halt(); } #endif @@ -1245,6 +1245,8 @@ cpu_idle_acpi(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; + + /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); @@ -1263,9 +1265,22 @@ cpu_idle_hlt(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; + /* - * We must absolutely guarentee that hlt is the next instruction - * after sti or we introduce a timing window. + * Since we may be in a critical section from cpu_idle(), if + * an interrupt fires during that critical section we may have + * a pending preemption. If the CPU halts, then that thread + * may not execute until a later interrupt awakens the CPU. + * To handle this race, check for a runnable thread after + * disabling interrupts and immediately return if one is + * found. Also, we must absolutely guarentee that hlt is + * the next instruction after sti. This ensures that any + * interrupt that fires after the call to disable_intr() will + * immediately awaken the CPU from hlt. Finally, please note + * that on x86 this works fine because of interrupts enabled only + * after the instruction following sti takes place, while IF is set + * to 1 immediately, allowing hlt instruction to acknowledge the + * interrupt. */ disable_intr(); if (sched_runnable()) @@ -1292,11 +1307,19 @@ cpu_idle_mwait(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_MWAIT; - if (!sched_runnable()) { - cpu_monitor(state, 0, 0); - if (*state == STATE_MWAIT) - cpu_mwait(0, MWAIT_C1); + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) { + enable_intr(); + *state = STATE_RUNNING; + return; } + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); + else + enable_intr(); *state = STATE_RUNNING; } @@ -1308,6 +1331,12 @@ cpu_idle_spin(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_RUNNING; + + /* + * The sched_runnable() call is racy but as long as there is + * a loop missing it one time will have just a little impact if any + * (and it is much better than missing the check at all). + */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return; Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c (revision 225875) +++ sys/amd64/amd64/machdep.c (working copy) @@ -609,7 +609,7 @@ void cpu_halt(void) { for (;;) - __asm__ ("hlt"); + halt(); } void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ @@ -630,6 +630,8 @@ cpu_idle_acpi(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; + + /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); @@ -647,9 +649,22 @@ cpu_idle_hlt(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_SLEEPING; + /* - * We must absolutely guarentee that hlt is the next instruction - * after sti or we introduce a timing window. + * Since we may be in a critical section from cpu_idle(), if + * an interrupt fires during that critical section we may have + * a pending preemption. If the CPU halts, then that thread + * may not execute until a later interrupt awakens the CPU. + * To handle this race, check for a runnable thread after + * disabling interrupts and immediately return if one is + * found. Also, we must absolutely guarentee that hlt is + * the next instruction after sti. This ensures that any + * interrupt that fires after the call to disable_intr() will + * immediately awaken the CPU from hlt. Finally, please note + * that on x86 this works fine because of interrupts enabled only + * after the instruction following sti takes place, while IF is set + * to 1 immediately, allowing hlt instruction to acknowledge the + * interrupt. */ disable_intr(); if (sched_runnable()) @@ -675,11 +690,19 @@ cpu_idle_mwait(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_MWAIT; - if (!sched_runnable()) { - cpu_monitor(state, 0, 0); - if (*state == STATE_MWAIT) - cpu_mwait(0, MWAIT_C1); + + /* See comments in cpu_idle_hlt(). */ + disable_intr(); + if (sched_runnable()) { + enable_intr(); + *state = STATE_RUNNING; + return; } + cpu_monitor(state, 0, 0); + if (*state == STATE_MWAIT) + __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); + else + enable_intr(); *state = STATE_RUNNING; } @@ -691,6 +714,12 @@ cpu_idle_spin(int busy) state = (int *)PCPU_PTR(monitorbuf); *state = STATE_RUNNING; + + /* + * The sched_runnable() call is racy but as long as there is + * a loop missing it one time will have just a little impact if any + * (and it is much better than missing the check at all). + */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return;