Index: kern/kern_synch.c =================================================================== --- kern/kern_synch.c (revision 225464) +++ kern/kern_synch.c (working copy) @@ -392,6 +392,15 @@ struct thread *td; struct proc *p; +#ifdef __amd64__ + CTR2(KTR_PROC, "mi_switch: suspending rflags %lx, cr8 %lx", + read_rflags(), rcr8()); + /* XXX: Temporary debugging */ + KASSERT(read_rflags() & PSL_I, + ("mi_switch() suspending with interrupts disabled")); + KASSERT(rcr8() == IDT_CRITICAL >> 4, + ("mi_switch() suspending with bad TPR")); +#endif td = curthread; /* XXX */ THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED); p = td->td_proc; /* XXX */ @@ -451,6 +460,16 @@ CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, td->td_name); +#ifdef __amd64__ + CTR2(KTR_PROC, "mi_switch: resuming rflags %lx, cr8 %lx", read_rflags(), + rcr8()); + /* XXX: Temporary debugging */ + KASSERT(read_rflags() & PSL_I, + ("mi_switch() resuming with interrupts disabled")); + KASSERT(rcr8() == IDT_CRITICAL >> 4, + ("mi_switch() resuming with bad TPR")); +#endif + /* * If the last thread was exiting, finish cleaning it up. */ Index: kern/kern_fork.c =================================================================== --- kern/kern_fork.c (revision 225464) +++ kern/kern_fork.c (working copy) @@ -70,6 +70,9 @@ #include #include +#include +#include + #include #include @@ -840,6 +843,17 @@ CTR4(KTR_PROC, "fork_exit: new thread %p (td_sched %p, pid %d, %s)", td, td->td_sched, p->p_pid, td->td_name); +#ifdef __amd64__ + CTR2(KTR_PROC, "fork_exit: pcb_rflags %lx, pcb_cr8 %lx", + td->td_pcb->pcb_rflags, td->td_pcb->pcb_cr8); + CTR2(KTR_PROC, "fork_exit: rflags %lx, cr8 %lx", read_rflags(), + rcr8()); + /* XXX: Temporary debugging */ + KASSERT(rcr8() == IDT_CRITICAL >> 4, + ("fork_exit() with bad TPR")); + KASSERT(read_rflags() & PSL_I, + ("fork_exit() with interrupts disabled")); +#endif sched_fork_exit(td); /* * Processes normally resume in mi_switch() after being Index: x86/x86/local_apic.c =================================================================== --- x86/x86/local_apic.c (revision 225464) +++ x86/x86/local_apic.c (working copy) @@ -87,7 +87,6 @@ /* Sanity checks on IDT vectors. */ CTASSERT(APIC_IO_INTS + APIC_NUM_IOINTS == APIC_TIMER_INT); CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); -CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); /* Magic IRQ values for the timer and syscalls. */ @@ -283,9 +282,13 @@ lapics[apic_id].la_ioint_irqs[IDT_DTRACE_RET - APIC_IO_INTS] = IRQ_DTRACE_RET; #endif - #ifdef SMP - cpu_add(apic_id, boot_cpu); + /* + * Don't add APs, just enable the local APIC on the BSP if + * APIC support is disabled. + */ + if (!resource_disabled("apic", 0)) + cpu_add(apic_id, boot_cpu); #endif } @@ -325,8 +328,10 @@ saveintr = intr_disable(); maxlvt = (lapic->version & APIC_VER_MAXLVT) >> MAXLVTSHIFT; +#ifdef __i386__ /* Initialize the TPR to allow all interrupts. */ lapic_set_tpr(0); +#endif /* Setup spurious vector and enable the local APIC. */ lapic_enable(); @@ -1281,10 +1286,6 @@ if (!(cpu_feature & CPUID_APIC)) return; - /* Don't probe if APIC mode is disabled. */ - if (resource_disabled("apic", 0)) - return; - /* First, probe all the enumerators to find the best match. */ best_enum = NULL; best = 0; @@ -1300,6 +1301,9 @@ if (best_enum == NULL) { if (bootverbose) printf("APIC: Could not find any APICs.\n"); +#ifdef __amd64__ + panic("amd64 requires a local APIC"); +#endif return; } @@ -1366,11 +1370,18 @@ if (best_enum == NULL) return; - retval = best_enum->apic_setup_io(); - if (retval != 0) - printf("%s: Failed to setup I/O APICs: returned %d\n", - best_enum->apic_name, retval); + /* + * Don't setup I/O APICs, but do leave the local APIC enabled if + * APIC support is disabled. + */ + if (!resource_disabled("apic", 0)) { + retval = best_enum->apic_setup_io(); + if (retval != 0) + printf("%s: Failed to setup I/O APICs: returned %d\n", + best_enum->apic_name, retval); + } + #ifdef XEN return; #endif @@ -1456,7 +1467,7 @@ { register_t icrlo, destfield; - KASSERT((vector & ~APIC_VECTOR_MASK) == 0, + KASSERT((vector & ~APIC_VECTOR_MASK) == 0 || vector == IPI_STOP_HARD, ("%s: invalid vector %d", __func__, vector)); icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; Index: amd64/acpica/acpi_switch.S =================================================================== --- amd64/acpica/acpi_switch.S (revision 225464) +++ amd64/acpica/acpi_switch.S (working copy) @@ -99,11 +99,13 @@ andq $~(CR0_EM | CR0_TS), %rax movq %rax, %cr0 - /* Restore CR2 and CR4. */ + /* Restore CR2, CR4, and CR8. */ movq PCB_CR2(%rdi), %rax movq %rax, %cr2 movq PCB_CR4(%rdi), %rax movq %rax, %cr4 + movq PCB_CR8(%rdi), %rax + movq %rax, %cr8 /* Restore descriptor tables. */ lidt PCB_IDT(%rdi) Index: amd64/include/pcb.h =================================================================== --- amd64/include/pcb.h (revision 225464) +++ amd64/include/pcb.h (working copy) @@ -59,12 +59,13 @@ register_t pcb_cr2; register_t pcb_cr3; register_t pcb_cr4; + register_t pcb_cr8; register_t pcb_dr0; register_t pcb_dr1; register_t pcb_dr2; register_t pcb_dr3; register_t pcb_dr6; register_t pcb_dr7; struct region_descriptor pcb_gdt; struct region_descriptor pcb_idt; Index: amd64/include/asmacros.h =================================================================== --- amd64/include/asmacros.h (revision 225464) +++ amd64/include/asmacros.h (working copy) @@ -199,6 +199,21 @@ movq %gs:PC_PRVSPACE, reg ; \ addq $PC_ ## member, reg +/* + * Macros to enable critical interrupts from an interrupt gate. + */ +#define ENABLE_CRITICAL \ + movq %cr8, %rax ; \ + pushq %rax ; \ + movq $IDT_CRITICAL >> 4, %rax ; \ + movq %rax, %cr8 ; \ + sti + +#define DISABLE_CRITICAL \ + cli ; \ + popq %rax ; \ + movq %rax, %cr8 + #endif /* LOCORE */ #endif /* !_MACHINE_ASMACROS_H_ */ Index: amd64/include/apicvar.h =================================================================== --- amd64/include/apicvar.h (revision 225464) +++ amd64/include/apicvar.h (working copy) @@ -42,11 +42,11 @@ * Layout of local APIC interrupt vectors: * * 0xff (255) +-------------+ - * | | 15 (Spurious / IPIs / Local Interrupts) + * | | 15 (Spurious / Critical IPIs ) * 0xf0 (240) +-------------+ - * | | 14 (I/O Interrupts / Timer) + * | | 14 (Local Interrupts / IPIs ) * 0xe0 (224) +-------------+ - * | | 13 (I/O Interrupts) + * | | 13 (I/O Interrupts / Timer ) * 0xd0 (208) +-------------+ * | | 12 (I/O Interrupts) * 0xc0 (192) +-------------+ @@ -84,7 +84,7 @@ /* I/O Interrupts are used for external devices such as ISA, PCI, etc. */ #define APIC_IO_INTS (IDT_IO_INTS + 16) -#define APIC_NUM_IOINTS 191 +#define APIC_NUM_IOINTS 175 /* The timer interrupt is used for clock handling and drives hardclock, etc. */ #define APIC_TIMER_INT (APIC_IO_INTS + APIC_NUM_IOINTS) @@ -105,19 +105,15 @@ */ /* Interrupts for local APIC LVT entries other than the timer. */ -#define APIC_LOCAL_INTS 240 +#define APIC_LOCAL_INTS 224 #define APIC_ERROR_INT APIC_LOCAL_INTS #define APIC_THERMAL_INT (APIC_LOCAL_INTS + 1) #define APIC_CMC_INT (APIC_LOCAL_INTS + 2) #define APIC_IPI_INTS (APIC_LOCAL_INTS + 3) -#define IPI_RENDEZVOUS (APIC_IPI_INTS) /* Inter-CPU rendezvous. */ -#define IPI_INVLTLB (APIC_IPI_INTS + 1) /* TLB Shootdown IPIs */ -#define IPI_INVLPG (APIC_IPI_INTS + 2) -#define IPI_INVLRNG (APIC_IPI_INTS + 3) -#define IPI_INVLCACHE (APIC_IPI_INTS + 4) + /* Vector to handle bitmap based IPIs */ -#define IPI_BITMAP_VECTOR (APIC_IPI_INTS + 6) +#define IPI_BITMAP_VECTOR (APIC_IPI_INTS) /* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ #define IPI_AST 0 /* Generate software trap. */ @@ -128,10 +124,18 @@ #define IPI_BITMAP_LAST IPI_PROFCLOCK #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) -#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ -#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ -#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ +#define IPI_STOP (APIC_IPI_INTS + 1) /* Stop CPU until restarted. */ +#define IPI_SUSPEND (APIC_IPI_INTS + 2) /* Suspend CPU until restarted. */ +#define IPI_RENDEZVOUS (APIC_IPI_INTS + 3) /* Inter-CPU rendezvous. */ +/* IPIs above this point are not blocked by spinlocks. */ +#define IPI_INVLTLB (IDT_CRITICAL) /* TLB Shootdown IPIs */ +#define IPI_INVLPG (IDT_CRITICAL + 1) +#define IPI_INVLRNG (IDT_CRITICAL + 2) +#define IPI_INVLCACHE (IDT_CRITICAL + 3) + +#define IPI_STOP_HARD (NIDT) /* Stop CPU with a NMI. */ + /* * The spurious interrupt can share the priority class with the IPIs since * it is not a normal interrupt. (Does not use the APIC's interrupt fifo) Index: amd64/include/cpufunc.h =================================================================== --- amd64/include/cpufunc.h (revision 225464) +++ amd64/include/cpufunc.h (working copy) @@ -400,6 +400,22 @@ return (data); } +static __inline void +load_cr8(u_long data) +{ + + __asm __volatile("movq %0,%%cr8" : : "r" (data)); +} + +static __inline u_long +rcr8(void) +{ + u_long data; + + __asm __volatile("movq %%cr8,%0" : "=r" (data)); + return (data); +} + /* * Global TLB flush (except for thise for pages marked PG_G) */ @@ -658,6 +674,24 @@ write_rflags(rflags); } +static __inline register_t +raise_cr8(u_int level) +{ + register_t cr8; + + cr8 = rcr8(); + if (cr8 < (level >> 4)) + load_cr8(level >> 4); + return (cr8); +} + +static __inline void +restore_cr8(u_int cr8) +{ + + load_cr8(cr8); +} + #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ int breakpoint(void); @@ -716,7 +750,7 @@ u_int64_t rdr6(void); u_int64_t rdr7(void); u_int64_t rdtsc(void); -u_int read_rflags(void); +u_long read_rflags(void); u_int rfs(void); u_int rgs(void); void wbinvd(void); Index: amd64/include/segments.h =================================================================== --- amd64/include/segments.h (revision 225464) +++ amd64/include/segments.h (working copy) @@ -215,6 +215,7 @@ #define IDT_IO_INTS NRSVIDT /* Base of IDT entries for I/O interrupts. */ #define IDT_SYSCALL 0x80 /* System Call Interrupt Vector */ #define IDT_DTRACE_RET 0x92 /* DTrace pid provider Interrupt Vector */ +#define IDT_CRITICAL 0xf0 /* First interrupt not blocked by spin locks */ /* * Entries in the Global Descriptor Table (GDT) Index: amd64/include/proc.h =================================================================== --- amd64/include/proc.h (revision 225464) +++ amd64/include/proc.h (working copy) @@ -45,7 +45,7 @@ */ struct mdthread { int md_spinlock_count; /* (k) */ - register_t md_saved_flags; /* (k) */ + register_t md_saved_cr8; /* (k) */ }; struct mdproc { Index: amd64/amd64/vm_machdep.c =================================================================== --- amd64/amd64/vm_machdep.c (revision 225464) +++ amd64/amd64/vm_machdep.c (working copy) @@ -184,7 +187,7 @@ /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; - td2->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td2->td_md.md_saved_cr8 = 0; /* As an i386, do not copy io permission bitmap. */ pcb2->pcb_tssp = NULL; @@ -422,7 +428,7 @@ /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; - td->td_md.md_saved_flags = PSL_KERNEL | PSL_I; + td->td_md.md_saved_cr8 = 0; } /* Index: amd64/amd64/genassym.c =================================================================== --- amd64/amd64/genassym.c (revision 225464) +++ amd64/amd64/genassym.c (working copy) @@ -140,12 +140,13 @@ ASSYM(PCB_CR2, offsetof(struct pcb, pcb_cr2)); ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3)); ASSYM(PCB_CR4, offsetof(struct pcb, pcb_cr4)); +ASSYM(PCB_CR8, offsetof(struct pcb, pcb_cr8)); ASSYM(PCB_DR0, offsetof(struct pcb, pcb_dr0)); ASSYM(PCB_DR1, offsetof(struct pcb, pcb_dr1)); ASSYM(PCB_DR2, offsetof(struct pcb, pcb_dr2)); ASSYM(PCB_DR3, offsetof(struct pcb, pcb_dr3)); ASSYM(PCB_DR6, offsetof(struct pcb, pcb_dr6)); ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7)); ASSYM(PCB_GDT, offsetof(struct pcb, pcb_gdt)); ASSYM(PCB_IDT, offsetof(struct pcb, pcb_idt)); ASSYM(PCB_LDT, offsetof(struct pcb, pcb_ldt)); @@ -230,6 +234,8 @@ ASSYM(LA_ICR_HI, offsetof(struct LAPIC, icr_hi)); ASSYM(LA_ISR, offsetof(struct LAPIC, isr0)); +ASSYM(IDT_CRITICAL, IDT_CRITICAL); + ASSYM(KCSEL, GSEL(GCODE_SEL, SEL_KPL)); ASSYM(KDSEL, GSEL(GDATA_SEL, SEL_KPL)); ASSYM(KUCSEL, GSEL(GUCODE_SEL, SEL_UPL)); Index: amd64/amd64/cpu_switch.S =================================================================== --- amd64/amd64/cpu_switch.S (revision 225464) +++ amd64/amd64/cpu_switch.S (working copy) @@ -76,7 +76,7 @@ movq PCPU(CURPMAP),%rdx LK btrl %eax,PM_ACTIVE(%rdx) /* clear old */ 1: - movq TD_PCB(%rsi),%r8 /* newtd->td_proc */ + movq TD_PCB(%rsi),%r8 /* newtd->td_pcb */ movq PCB_CR3(%r8),%rdx movq %rdx,%cr3 /* new address space */ jmp swact @@ -322,6 +334,8 @@ movq %rax,PCB_CR3(%rdi) movq %cr4,%rax movq %rax,PCB_CR4(%rdi) + movq %cr8,%rax + movq %rax,PCB_CR8(%rdi) movq %dr0,%rax movq %rax,PCB_DR0(%rdi) Index: amd64/amd64/mp_machdep.c =================================================================== --- amd64/amd64/mp_machdep.c (revision 225464) +++ amd64/amd64/mp_machdep.c (working copy) @@ -684,8 +684,10 @@ wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); +#if 0 /* Disable local APIC just to be sure. */ lapic_disable(); +#endif /* signal our startup to the BSP. */ mp_naps++; @@ -725,6 +727,10 @@ /* Init local apic for irq's */ lapic_setup(1); + /* Raise TPR and enable interrupts. */ + raise_cr8(IDT_CRITICAL); + enable_intr(); + /* Set memory range attributes for this CPU to match the BSP */ mem_range_AP_init(); @@ -1107,6 +1113,10 @@ mtx_unlock_spin(&smp_ipi_mtx); } +/* CR8 assumes the low 4 bits are always zero. */ +CTASSERT((IDT_CRITICAL & 0x0f) == 0); +CTASSERT(IPI_RENDEZVOUS < IDT_CRITICAL && IPI_INVLTLB >= IDT_CRITICAL); + /* * Send an IPI to specified CPU handling the bitmap logic. */ Index: amd64/amd64/machdep.c =================================================================== --- amd64/amd64/machdep.c (revision 225464) +++ amd64/amd64/machdep.c (working copy) @@ -1767,13 +1767,13 @@ spinlock_enter(void) { struct thread *td; - register_t flags; + register_t cr8; td = curthread; if (td->td_md.md_spinlock_count == 0) { - flags = intr_disable(); + cr8 = raise_cr8(IDT_CRITICAL); td->td_md.md_spinlock_count = 1; - td->td_md.md_saved_flags = flags; + td->td_md.md_saved_cr8 = cr8; } else td->td_md.md_spinlock_count++; critical_enter(); @@ -1783,14 +1783,14 @@ spinlock_exit(void) { struct thread *td; - register_t flags; + register_t cr8; td = curthread; critical_exit(); - flags = td->td_md.md_saved_flags; + cr8 = td->td_md.md_saved_cr8; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) - intr_restore(flags); + restore_cr8(cr8); } /* Index: amd64/amd64/trap.c =================================================================== --- amd64/amd64/trap.c (revision 225464) +++ amd64/amd64/trap.c (working copy) @@ -293,13 +293,7 @@ */ printf("kernel trap %d with interrupts disabled\n", type); - - /* - * We shouldn't enable interrupts while holding a - * spin lock. - */ - if (td->td_md.md_spinlock_count == 0) - enable_intr(); + enable_intr(); } } Index: amd64/amd64/apic_vector.S =================================================================== --- amd64/amd64/apic_vector.S (revision 225464) +++ amd64/amd64/apic_vector.S (working copy) @@ -100,7 +100,9 @@ PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) movq %rsp, %rdi + ENABLE_CRITICAL call lapic_handle_timer + DISABLE_CRITICAL MEXITCOUNT jmp doreti @@ -112,7 +114,9 @@ IDTVEC(cmcint) PUSH_FRAME FAKE_MCOUNT(TF_RIP(%rsp)) + ENABLE_CRITICAL call lapic_handle_cmc + DISABLE_CRITICAL MEXITCOUNT jmp doreti @@ -227,7 +231,9 @@ FAKE_MCOUNT(TF_RIP(%rsp)) + ENABLE_CRITICAL call ipi_bitmap_handler + DISABLE_CRITICAL MEXITCOUNT jmp doreti Index: amd64/amd64/intr_machdep.c =================================================================== --- amd64/amd64/intr_machdep.c (revision 225464) +++ amd64/amd64/intr_machdep.c (working copy) @@ -232,9 +232,18 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame) { struct intr_event *ie; + register_t cr8; int vector; /* + * Raise cr8 and enable interrupts to permit critical + * interrupts while processing device interrupts. + */ + cr8 = raise_cr8(IDT_CRITICAL); + KASSERT(cr8 < IDT_CRITICAL, ("device interrupt with critical cr8")); + enable_intr(); + + /* * We count software interrupts when we process them. The * code here follows previous practice, but there's an * argument for counting hardware interrupts when they're @@ -267,6 +276,10 @@ "too many stray irq %d's: not logging anymore\n", vector); } + + /* Disable interrupts and restore cr8. */ + disable_intr(); + restore_cr8(cr8); } void