diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 82e6e56..95d3c46 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -86,6 +86,8 @@ __FBSDID("$FreeBSD: src/sys/amd64/amd64/machdep.c,v 1.675 2007/06/06 07:35:07 da #include #include #include +#include +#include #include #include @@ -1910,3 +1912,147 @@ outb(u_int port, u_char data) } #endif /* KDB */ + +static int banks; +int panic_on_uc_mce; +static int log_corrected_mce = 1; +static int mce_poll_delay = 30; +static int corrected_mce_count; +static int uncorrected_mce_count; +static int mce_enable = 1; + +static void mce_kthread(void *); +int handle_mce(void); +static void init_mce(void *); + +TUNABLE_INT("machdep.mce.enable", &mce_enable); + +SYSCTL_NODE(_machdep, OID_AUTO, mce, CTLFLAG_RW, 0, ""); +SYSCTL_INT(_machdep_mce, OID_AUTO, panic_on_uc, CTLFLAG_RW, &panic_on_uc_mce, 0, "Panic on uncorrected MCE"); +SYSCTL_INT(_machdep_mce, OID_AUTO, log_corrected, CTLFLAG_RW, &log_corrected_mce, 0, "Log corrected MCEs"); +SYSCTL_INT(_machdep_mce, OID_AUTO, poll_delay, CTLFLAG_RW, &mce_poll_delay, 0, "MCE poll delay, in seconds"); +SYSCTL_INT(_machdep_mce, OID_AUTO, corrected_count, CTLFLAG_RW, &corrected_mce_count, 0, "Corrected MCEs count"); +SYSCTL_INT(_machdep_mce, OID_AUTO, uncorrected_count, CTLFLAG_RW, &uncorrected_mce_count, 0, "Uncorrected MCEs count"); +SYSCTL_INT(_machdep_mce, OID_AUTO, enable, CTLFLAG_RD, &mce_enable, 0, "MCE subsystem enabled"); + +int +handle_mce(void) +{ + long addr, misc, status, tsc; + int i, regs[4], ret; + + addr = misc = ret = 0; + for (i = 0; i < banks; i++) { + status = rdmsr(MSR_MC0_STATUS + 4 * i); + if ((status & MCE_STATUS_VAL) == 0) + continue; + + tsc = rdtsc(); + if ((status & MCE_STATUS_ADDR) != 0) + addr = rdmsr(MSR_MC0_ADDR + 4 * i); + if ((status & MCE_STATUS_MISC) != 0) + misc = rdmsr(MSR_MC0_MISC + 4 * i); + if ((status & MCE_STATUS_UC) != 0) { + ret = SIGBUS; + printf("Uncorrected MCE on cpu %d bank: %d status: %lx" + "addr: %lx misc: %lx tsc: %lx\n", cpu, i, status, + addr, misc, tsc); + atomic_add_int(&uncorrected_mce_count, 1); + } else { + if (log_corrected_mce) + printf("Corrected MCE on cpu %d bank: %d" + " status: %lx addr: %lx misc: %lx" + " tsc: %lx\n", cpu, i, status, addr, misc, + tsc); + atomic_add_int(&corrected_mce_count, 1); + } + wrmsr(MSR_MC0_STATUS + 4 * i, 0); + /* We need a synchronizing instruction */ + do_cpuid(1, regs); + } + wrmsr(MSR_MCG_STATUS, 0); + + return (ret); +} + +static void +mce_kthread(void *unused) +{ + register_t rflags; + int cpu, ret; + + while (1) { + for (cpu = 0; cpu < MAXCPU; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + + thread_lock(curthread); + sched_bind(curthread, cpu); + thread_unlock(curthread); + + /* + * Uncorrected MCEs will generate a #MC, while corrected + * don't, so we have to periodically poll for them. + */ + rflags = intr_disable(); + ret = handle_mce(); + intr_restore(rflags); + if (ret != 0 && panic_on_uc_mce) + panic("Uncorrected machine check exception"); + + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + } + mtx_lock(&Giant); + tsleep(mce_kthread, PPAUSE, "mce poll", mce_poll_delay * hz); + mtx_unlock(&Giant); + } +} + +static void +init_mce(void *unused) +{ + long cap; + int cpu, error, i, regs[4]; + + if (mce_enable == 0) + return; + + do_cpuid(1, regs); + /* MCA or MCE not supported. */ + if (((regs[3] & CPUID_MCA) == 0) || ((regs[3] & CPUID_MCE) == 0)) + return; + + cap = rdmsr(MSR_MCG_CAP); + banks = cap & MCG_CTL_COUNT; + if (bootverbose) + printf("%s: found %d MCE banks\n", __func__, banks); + + for (cpu = 0; cpu < MAXCPU; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + + thread_lock(curthread); + sched_bind(curthread, cpu); + thread_unlock(curthread); + load_cr4(rcr4() | CR4_MCE); + + if (cap & MCG_CTL_P) + wrmsr(MSR_MCG_CTL, ~0UL); + + for (i = 0; i < banks; i++) { + wrmsr(MSR_MC0_CTL + 4 * i, ~0UL); + wrmsr(MSR_MC0_STATUS, 0); + } + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); + } + + error = kthread_create(mce_kthread, NULL, NULL, 0, 0, "mce poll"); + if (error) + printf("Couldn't start MCE poll thread\n"); +} + +SYSINIT(mce, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, init_mce, NULL) diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c index 93d8fd6..9abc0ba 100644 --- a/sys/amd64/amd64/trap.c +++ b/sys/amd64/amd64/trap.c @@ -147,6 +147,9 @@ SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RW, extern char *syscallnames[]; +int handle_mce(void); +extern int panic_on_uc_mce; + /* * Exception, fault, and trap interface to the FreeBSD kernel. * This common code is called from assembly language IDT gate entry @@ -371,6 +374,13 @@ trap(struct trapframe *frame) ucode = 0; /* XXX */ i = SIGFPE; break; + case T_MCHK: + i = handle_mce(); + if (i == 0) + goto userout; + if (panic_on_uc_mce) + panic("Uncorrected machine check exception"); + break; } } else { /* kernel trap */ @@ -493,6 +503,11 @@ trap(struct trapframe *frame) goto out; /* FALLTHROUGH */ #endif /* DEV_ISA */ + case T_MCHK: + i = handle_mce(); + if (i && panic_on_uc_mce) + break; + goto out; } trap_fatal(frame, 0); diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h index 91436d0..7b4b363 100644 --- a/sys/amd64/include/specialreg.h +++ b/sys/amd64/include/specialreg.h @@ -393,4 +393,14 @@ #define MSR_TOP_MEM 0xc001001a /* boundary for ram below 4G */ #define MSR_TOP_MEM2 0xc001001d /* boundary for ram above 4G */ +#define MCE_STATUS_PCC 0x0200000000000000UL +#define MCE_STATUS_ADDR 0x0400000000000000UL +#define MCE_STATUS_MISC 0x0800000000000000UL +#define MCE_STATUS_EN 0x1000000000000000UL +#define MCE_STATUS_UC 0x2000000000000000UL +#define MCE_STATUS_OVER 0x4000000000000000UL +#define MCE_STATUS_VAL 0x8000000000000000UL +#define MCG_CTL_P 0x00000100 +#define MCG_CTL_COUNT 0x000000ff + #endif /* !_MACHINE_SPECIALREG_H_ */