--- /dev/null 2009-03-16 20:17:56.000000000 -0400 +++ sys/amd64/acpica/Makefile 2009-03-10 17:20:05.000000000 -0400 @@ -0,0 +1,33 @@ +# $FreeBSD$ + +# Correct path for kernel builds +# Don't rely on the kernel's .depend file +.ifdef MAKESRCPATH +.PATH: ${MAKESRCPATH} +DEPENDFILE= +.else +MAKESRCPATH= ${.CURDIR} +CLEANFILES= acpi_wakecode.h acpi_wakedata.h acpi_wakecode.bin acpi_wakecode.o +.endif +.if ${CC} == "icc" +CFLAGS+= -restrict +NOSTDINC= -X +.else +NOSTDINC= -nostdinc +.endif +CFLAGS+= ${NOSTDINC} -include opt_global.h -I. -I${MAKESRCPATH}/../.. + +all: acpi_wakecode.h acpi_wakedata.h + +acpi_wakecode.o: acpi_wakecode.S assym.s + +acpi_wakecode.bin: acpi_wakecode.o + objcopy -S -O binary acpi_wakecode.o acpi_wakecode.bin + +acpi_wakecode.h: acpi_wakecode.bin + sh ${MAKESRCPATH}/genwakecode.sh > acpi_wakecode.h + +acpi_wakedata.h: acpi_wakecode.bin + sh ${MAKESRCPATH}/genwakedata.sh > acpi_wakedata.h + +.include --- /dev/null 2009-03-16 20:17:56.000000000 -0400 +++ sys/amd64/acpica/acpi_switch.S 2009-03-16 19:59:39.000000000 -0400 @@ -0,0 +1,177 @@ +/*- + * Copyright (c) 2001 Takanori Watanabe + * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2008-2009 Jung-uk Kim + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include + +#include "acpi_wakedata.h" +#include "assym.s" + +#define WAKEUP_DECL(member) \ + .set WAKEUP_ ## member, wakeup_ ## member - wakeup_ctx + + WAKEUP_DECL(xpcb) + WAKEUP_DECL(gdt) + WAKEUP_DECL(efer) + WAKEUP_DECL(pat) + WAKEUP_DECL(star) + WAKEUP_DECL(lstar) + WAKEUP_DECL(cstar) + WAKEUP_DECL(sfmask) + WAKEUP_DECL(cpu) + +#define WAKEUP_CTX(member) WAKEUP_ ## member (%rdi) +#define WAKEUP_PCB(member) PCB_ ## member(%r11) +#define WAKEUP_XPCB(member) XPCB_ ## member(%r11) + +ENTRY(acpi_restorecpu) + /* Switch to KPML4phys. */ + movq %rsi, %rax + movq %rax, %cr3 + + /* Restore GDT. */ + lgdt WAKEUP_CTX(gdt) + jmp 1f +1: + + /* Fetch PCB. */ + movq WAKEUP_CTX(xpcb), %r11 + + /* Restore segment registers. */ + mov WAKEUP_PCB(DS), %ds + mov WAKEUP_PCB(ES), %es + mov WAKEUP_XPCB(SS), %ss + mov WAKEUP_PCB(FS), %fs + mov WAKEUP_PCB(GS), %gs + + movl $MSR_FSBASE, %ecx + movl WAKEUP_PCB(FSBASE), %eax + movl 4 + WAKEUP_PCB(FSBASE), %edx + wrmsr + movl $MSR_GSBASE, %ecx + movl WAKEUP_PCB(GSBASE), %eax + movl 4 + WAKEUP_PCB(GSBASE), %edx + wrmsr + movl $MSR_KGSBASE, %ecx + movl WAKEUP_XPCB(KGSBASE), %eax + movl 4 + WAKEUP_XPCB(KGSBASE), %edx + wrmsr + + /* Restore EFER. */ + movl $MSR_EFER, %ecx + movl WAKEUP_CTX(efer), %eax + wrmsr + + /* Restore PAT. */ + movl $MSR_PAT, %ecx + movl WAKEUP_CTX(pat), %eax + movl 4 + WAKEUP_CTX(pat), %edx + wrmsr + + /* Restore fast syscall stuff. */ + movl $MSR_STAR, %ecx + movl WAKEUP_CTX(star), %eax + movl 4 + WAKEUP_CTX(star), %edx + wrmsr + movl $MSR_LSTAR, %ecx + movl WAKEUP_CTX(lstar), %eax + movl 4 + WAKEUP_CTX(lstar), %edx + wrmsr + movl $MSR_CSTAR, %ecx + movl WAKEUP_CTX(cstar), %eax + movl 4 + WAKEUP_CTX(cstar), %edx + wrmsr + movl $MSR_SF_MASK, %ecx + movl WAKEUP_CTX(sfmask), %eax + wrmsr + + /* Restore CR0, CR2 and CR4. */ + movq WAKEUP_XPCB(CR0), %rax + movq %rax, %cr0 + movq WAKEUP_XPCB(CR2), %rax + movq %rax, %cr2 + movq WAKEUP_XPCB(CR4), %rax + movq %rax, %cr4 + + /* Restore descriptor tables. */ + lidt WAKEUP_XPCB(IDT) + lldt WAKEUP_XPCB(LDT) + movw WAKEUP_XPCB(TR), %ax + ltr %ax + + /* Restore other callee saved registers. */ + movq WAKEUP_PCB(R15), %r15 + movq WAKEUP_PCB(R14), %r14 + movq WAKEUP_PCB(R13), %r13 + movq WAKEUP_PCB(R12), %r12 + movq WAKEUP_PCB(RBP), %rbp + movq WAKEUP_PCB(RSP), %rsp + movq WAKEUP_PCB(RBX), %rbx + + /* Restore debug registers. */ + movq WAKEUP_PCB(DR0), %rax + movq %rax, %dr0 + movq WAKEUP_PCB(DR1), %rax + movq %rax, %dr1 + movq WAKEUP_PCB(DR2), %rax + movq %rax, %dr2 + movq WAKEUP_PCB(DR3), %rax + movq %rax, %dr3 + movq WAKEUP_PCB(DR6), %rax + movq %rax, %dr6 + movq WAKEUP_PCB(DR7), %rax + movq %rax, %dr7 + + /* Restore return address. */ + movq WAKEUP_PCB(RIP), %rax + movq %rax, (%rsp) + + /* Indicate the CPU is resumed. */ + xorl %eax, %eax + movl %eax, WAKEUP_CTX(cpu) + + ret +END(acpi_restorecpu) + +ENTRY(acpi_savecpu) + /* Fetch XPCB and save CPU context. */ + movq %rdi, %r10 + call savectx2 + movq %r10, %r11 + + /* Patch caller's return address and stack pointer. */ + movq (%rsp), %rax + movq %rax, WAKEUP_PCB(RIP) + movq %rsp, %rax + movq %rax, WAKEUP_PCB(RSP) + + movl $1, %eax + ret +END(acpi_savecpu) --- /dev/null 2009-03-16 20:17:56.000000000 -0400 +++ sys/amd64/acpica/acpi_wakecode.S 2009-03-16 17:56:07.000000000 -0400 @@ -0,0 +1,278 @@ +/*- + * Copyright (c) 2001 Takanori Watanabe + * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 Jung-uk Kim + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define LOCORE + +#include +#include + +#include "assym.s" + +/* + * Resume entry point for real mode. + * + * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero + * in FACS, the BIOS enters here in real mode after POST with CS set to + * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf). + * Depending on the previous sleep state, we may need to initialize more + * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk). + * + * Note: If XFirmwareWakingVector is non-zero, it should disable address + * translation/paging and interrupts, load all segment registers with + * a flat 4 GB address space, and set EFLAGS.IF to zero. Currently + * this mode is not supported by this code. + */ + + .data /* So we can modify it */ + + ALIGN_TEXT +wakeup_start: + .code16 + /* + * Set up segment registers for real mode, a small stack for + * any calls we make, and clear any flags. + */ + cli /* make sure no interrupts */ + cld + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss + movw $PAGE_SIZE - 8, %sp + pushw $0 + popfw + + /* To debug resume hangs, beep the speaker if the user requested. */ + cmpw $0, resume_beep - wakeup_start + je 1f + movb $0xc0, %al + outb %al, $0x42 + movb $0x04, %al + outb %al, $0x42 + inb $0x61, %al + orb $0x3, %al + outb %al, $0x61 + movw $0, resume_beep - wakeup_start +1: + + /* Re-initialize video BIOS if the reset_video tunable is set. */ + cmpw $0, reset_video - wakeup_start + je 1f + lcall $0xc000, $3 + movw $0, reset_video - wakeup_start + + /* + * Set up segment registers for real mode again in case the + * previous BIOS call clobbers them. + */ + mov %cs, %ax + mov %ax, %ds + mov %ax, %ss +1: + + /* + * Find relocation base and patch the gdt descript and ljmp targets + */ + xorl %ebx, %ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ + + /* + * Load the descriptor table pointer. We'll need it when running + * in 16-bit protected mode. + */ + lgdtl bootgdtdesc - wakeup_start + + /* Enable protected mode */ + movl $CR0_PE, %eax + mov %eax, %cr0 + + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +wakeup_sw32: + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long wakeup_32 - wakeup_start /* offset in segment */ + .word bootcode32 - bootgdt /* index in gdt for 32 bit code */ + + /* + * At this point, we are running in 32 bit legacy protected mode. + */ + .code32 +wakeup_32: + + mov $bootdata32 - bootgdt, %eax + mov %ax, %ds + + /* Turn on the PAE and PSE bits for when paging is enabled */ + mov %cr4, %eax + orl $(CR4_PAE | CR4_PSE), %eax + mov %eax, %cr4 + + /* + * Enable EFER.LME so that we get long mode when all the prereqs are + * in place. In this case, it turns on when CR0_PG is finally enabled. + * Pick up a few other EFER bits that we'll use need we're here. + */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME | EFER_SCE, %eax + wrmsr + + /* + * Point to the embedded page tables for startup. Note that this + * only gets accessed after we're actually in 64 bit mode, however + * we can only set the bottom 32 bits of %cr3 in this state. This + * means we are required to use a temporary page table that is below + * the 4GB limit. %ebx is still our relocation base. We could just + * subtract 3 * PAGE_SIZE, but that would be too easy. + */ + leal wakeup_pagetables - wakeup_start(%ebx), %eax + movl (%eax), %eax + mov %eax, %cr3 + + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 + + /* + * At this point paging is enabled, and we are in "compatability" mode. + * We do another far jump to reload %cs with the 64 bit selector. + * %cr3 points to a 4-level page table page. + * We cannot yet jump all the way to the kernel because we can only + * specify a 32 bit linear address. So, yet another trampoline. + * + * The following instruction is: + * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +wakeup_sw64: + .byte 0xea /* opcode for far jump */ + .long wakeup_64 - wakeup_start /* offset in segment */ + .word bootcode64 - bootgdt /* index in gdt for 64 bit code */ + + /* + * Yeehar! We're running in 64-bit mode! We can mostly ignore our + * segment registers, and get on with it. + * Note that we are running at the correct virtual address, but with + * a 1:1 1GB mirrored mapping over entire address space. We had better + * switch to a real %cr3 promptly so that we can get to the direct map + * space. Remember that jmp is relative and that we've been relocated, + * so use an indirect jump. + */ + .code64 +wakeup_64: + mov $bootdata64 - bootgdt, %eax + mov %ax, %ds + + /* Restore arguments and return. */ + movq wakeup_ctx - wakeup_start(%rbx), %rdi + movq wakeup_kpml4 - wakeup_start(%rbx), %rsi + movq wakeup_retaddr - wakeup_start(%rbx), %rax + jmp *%rax + + ALIGN_DATA +bootgdt: + .long 0x00000000 + .long 0x00000000 + +bootcode64: + .long 0x0000ffff + .long 0x00af9b00 + +bootdata64: + .long 0x0000ffff + .long 0x00af9300 + +bootcode32: + .long 0x0000ffff + .long 0x00cf9b00 + +bootdata32: + .long 0x0000ffff + .long 0x00cf9300 +bootgdtend: + +wakeup_pagetables: + .long 0 + +bootgdtdesc: + .word bootgdtend - bootgdt /* Length */ + .long bootgdt - wakeup_start /* Offset plus %ds << 4 */ + + ALIGN_DATA +resume_beep: + .long 0 +reset_video: + .long 0 +wakeup_retaddr: + .quad 0 +wakeup_kpml4: + .quad 0 + +wakeup_ctx: + .quad 0 +wakeup_xpcb: + .quad 0 +wakeup_gdt: + .word 0 + .quad 0 +wakeup_efer: + .quad 0 +wakeup_pat: + .quad 0 +wakeup_star: + .quad 0 +wakeup_lstar: + .quad 0 +wakeup_cstar: + .quad 0 +wakeup_sfmask: + .quad 0 +wakeup_cpu: + .long 0 +dummy: --- /dev/null 2009-03-16 20:17:56.000000000 -0400 +++ sys/amd64/acpica/genwakecode.sh 2009-03-10 17:20:05.000000000 -0400 @@ -0,0 +1,6 @@ +#!/bin/sh +# $FreeBSD$ +# +file2c 'static char wakecode[] = {' '};' #include #include +#ifdef SMP +#include +#endif #include +#include #if defined(__i386__) || defined(__amd64__) #include @@ -2274,6 +2278,7 @@ return (acpi_EnterSleepState(sc, state)); } +#if defined(__amd64__) || defined(__i386__) static void acpi_sleep_force(void *arg) { @@ -2284,6 +2289,7 @@ if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate))) printf("acpi: force sleep state S%d failed\n", sc->acpi_next_sstate); } +#endif /* * Request that the system enter the given suspend state. All /dev/apm @@ -2294,7 +2300,9 @@ int acpi_ReqSleepState(struct acpi_softc *sc, int state) { +#if defined(__i386__) struct apm_clone_data *clone; +#endif if (state < ACPI_STATE_S1 || state > ACPI_STATE_S5) return (EINVAL); @@ -2307,11 +2315,7 @@ return (ENXIO); } -#if !defined(__i386__) - /* This platform does not support acpi suspend/resume. */ - return (EOPNOTSUPP); -#endif - +#if defined(__amd64__) || defined(__i386__) /* If a suspend request is already in progress, just return. */ ACPI_LOCK(acpi); if (sc->acpi_next_sstate != 0) { @@ -2321,6 +2325,7 @@ /* Record the pending state and notify all apm devices. */ sc->acpi_next_sstate = state; +#if defined(__i386__) STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) { clone->notify_status = APM_EV_NONE; if ((clone->flags & ACPI_EVF_DEVD) == 0) { @@ -2328,6 +2333,7 @@ KNOTE_UNLOCKED(&clone->sel_read.si_note, 0); } } +#endif /* If devd(8) is not running, immediately enter the sleep state. */ if (devctl_process_running() == FALSE) { @@ -2352,6 +2358,10 @@ callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc); ACPI_UNLOCK(acpi); return (0); +#else + /* This platform does not support acpi suspend/resume. */ + return (EOPNOTSUPP); +#endif } /* @@ -2364,14 +2374,10 @@ int acpi_AckSleepState(struct apm_clone_data *clone, int error) { +#if defined(__amd64__) || defined(__i386__) struct acpi_softc *sc; int ret, sleeping; -#if !defined(__i386__) - /* This platform does not support acpi suspend/resume. */ - return (EOPNOTSUPP); -#endif - /* If no pending sleep state, return an error. */ ACPI_LOCK(acpi); sc = clone->acpi_sc; @@ -2395,8 +2401,9 @@ * all devices, seeing if they agree yet. We only count devices that * are writable since read-only devices couldn't ack the request. */ + sleeping = TRUE; +#if defined(__i386__) clone->notify_status = APM_EV_ACKED; - sleeping = TRUE; STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) { if ((clone->flags & ACPI_EVF_WRITE) != 0 && clone->notify_status != APM_EV_ACKED) { @@ -2404,6 +2411,7 @@ break; } } +#endif /* If all devices have voted "yes", we will suspend now. */ if (sleeping) @@ -2414,8 +2422,11 @@ if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate))) ret = ENODEV; } - return (ret); +#else + /* This platform does not support acpi suspend/resume. */ + return (EOPNOTSUPP); +#endif } static void @@ -2459,11 +2470,18 @@ sc->acpi_sleep_disabled = 1; ACPI_UNLOCK(acpi); +#ifdef SMP + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); +#endif + /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); + slp_state = ACPI_SS_NONE; switch (state) { case ACPI_STATE_S1: @@ -2570,6 +2588,17 @@ acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state); mtx_unlock(&Giant); + + /* Warm up timecounter again */ + (void)timecounter->tc_get_timecount(timecounter); + (void)timecounter->tc_get_timecount(timecounter); + +#ifdef SMP + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); +#endif + return_ACPI_STATUS (status); } --- sys/dev/acpica/acpi_ec.c (revision 189902) +++ sys/dev/acpica/acpi_ec.c (working copy) @@ -747,7 +747,7 @@ * If booting, check if we need to run the query handler. If so, we * we call it directly here since our thread taskq is not active yet. */ - if (cold || rebooting) { + if (cold || rebooting || sc->ec_suspending) { if ((EC_GET_CSR(sc) & EC_EVENT_SCI)) { CTR0(KTR_ACPI, "ec running gpe handler directly"); EcGpeQueryHandler(sc); --- sys/i386/i386/i686_mem.c (revision 189902) +++ sys/i386/i386/i686_mem.c (working copy) @@ -73,11 +73,13 @@ static int i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); static void i686_mrAPinit(struct mem_range_softc *sc); +static void i686_mrreinit(struct mem_range_softc *sc); static struct mem_range_ops i686_mrops = { i686_mrinit, i686_mrset, - i686_mrAPinit + i686_mrAPinit, + i686_mrreinit }; /* XXX for AP startup hook */ @@ -668,7 +670,31 @@ wrmsr(MSR_MTRRdefType, mtrrdef); } +/* + * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor + * list. + * + * XXX Must be called with interrupts enabled. + */ static void +i686_mrreinit(struct mem_range_softc *sc) +{ +#ifdef SMP + /* + * We should use ipi_all_but_self() to call other CPUs into a + * locking gate, then call a target function to do this work. + * The "proper" solution involves a generalised locking gate + * implementation, not ready yet. + */ + smp_rendezvous(NULL, (void *)i686_mrAPinit, NULL, sc); +#else + disable_intr(); /* disable interrupts */ + i686_mrAPinit(sc); + enable_intr(); +#endif +} + +static void i686_mem_drvinit(void *unused) { --- sys/i386/i386/k6_mem.c (revision 189902) +++ sys/i386/i386/k6_mem.c (working copy) @@ -70,6 +70,7 @@ { k6_mrinit, k6_mrset, + NULL, NULL }; --- sys/amd64/acpica/acpi_wakeup.c (revision 189902) +++ sys/amd64/acpica/acpi_wakeup.c (working copy) @@ -1,6 +1,8 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008-2009 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,18 +31,411 @@ __FBSDID("$FreeBSD$"); #include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#ifdef SMP +#include +#include +#endif + #include #include +#include "acpi_wakecode.h" +#include "acpi_wakedata.h" + +/* Make sure the code is less than a page and leave room for the stack. */ +CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); + +#ifndef _SYS_CDEFS_H_ +#error this file needs sys/cdefs.h as a prerequisite +#endif + +extern uint32_t acpi_resume_beep; +extern uint32_t acpi_reset_video; + +#ifdef SMP +extern struct xpcb *stopxpcbs; +#else +static struct xpcb *stopxpcbs; +#endif + +int acpi_restorecpu(struct xpcb *, vm_offset_t); +int acpi_savecpu(struct xpcb *); + +static void acpi_reset_tss(int cpu); +static void acpi_alloc_wakeup_handler(void); +static void acpi_stop_beep(void *); + +#ifdef SMP +static int acpi_wakeup_ap(struct acpi_softc *, int); +static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +#endif + +#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) +#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE)) +#define WAKECODE_FIXUP(offset, type, val) do { \ + type *addr; \ + addr = (type *)(WAKECODE_VADDR(sc) + offset); \ + *addr = val; \ +} while (0) + +/* Turn off bits 1&2 of the PIT, stopping the beep. */ +static void +acpi_stop_beep(void *arg) +{ + outb(0x61, inb(0x61) & ~0x3); +} + +#ifdef SMP +static int +acpi_wakeup_ap(struct acpi_softc *sc, int cpu) +{ + int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff; + int apic_id = cpu_apic_ids[cpu]; + int ms; + + WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[cpu]); + WAKECODE_FIXUP(wakeup_gdt, uint16_t, stopxpcbs[cpu].xpcb_gdt.rd_limit); + WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, + stopxpcbs[cpu].xpcb_gdt.rd_base); + WAKECODE_FIXUP(wakeup_cpu, int, cpu); + + acpi_reset_tss(cpu); + + /* do an INIT IPI: assert RESET */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); + + /* wait for pending status end */ + lapic_ipi_wait(-1); + + /* do an INIT IPI: deassert RESET */ + lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); + + /* wait for pending status end */ + DELAY(10000); /* wait ~10mS */ + lapic_ipi_wait(-1); + + /* + * next we do a STARTUP IPI: the previous INIT IPI might still be + * latched, (P5 bug) this 1st STARTUP would then terminate + * immediately, and the previously started INIT IPI would continue. OR + * the previous INIT IPI has already run. and this STARTUP IPI will + * run. OR the previous INIT IPI was ignored. and this STARTUP IPI + * will run. + */ + + /* do a STARTUP IPI */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* + * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF + * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR + * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is + * recognized after hardware RESET or INIT IPI. + */ + + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* Wait up to 5 seconds for it to start. */ + for (ms = 0; ms < 5000; ms++) { + if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0) + return (1); /* return SUCCESS */ + DELAY(1000); + } + return (0); /* return FAILURE */ +} + +#define WARMBOOT_TARGET 0 +#define WARMBOOT_OFF (KERNBASE + 0x0467) +#define WARMBOOT_SEG (KERNBASE + 0x0469) + +#define CMOS_REG (0x70) +#define CMOS_DATA (0x71) +#define BIOS_RESET (0x0f) +#define BIOS_WARM (0x0a) + +static void +acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +{ + uint32_t mpbioswarmvec; + cpumask_t map; + int cpu; + u_char mpbiosreason; + + /* save the current value of the warm-start vector */ + mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); + outb(CMOS_REG, BIOS_RESET); + mpbiosreason = inb(CMOS_DATA); + + /* setup a vector to our boot code */ + *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4; + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ + + /* Wake up each AP. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { + map = 1ul << cpu; + if ((wakeup_cpus & map) != map) + continue; + if (acpi_wakeup_ap(sc, cpu) == 0) { + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", + cpu, cpu_apic_ids[cpu]); + } + } + + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, mpbiosreason); +} +#endif + +static void +acpi_reset_tss(int cpu) +{ + uint32_t *tss; + + /* + * We have to clear "task busy" bit in TSS to restore + * task register later. Otherwise, ltr causes GPF. + */ + tss = (uint32_t *)&gdt[NGDT * cpu + GPROC0_SEL] + 1; + *tss &= ~((SDT_SYSBSY ^ SDT_SYSTSS) << 8); +} + int acpi_sleep_machdep(struct acpi_softc *sc, int state) { - return (0); + struct savefpu *stopfpu; +#ifdef SMP + cpumask_t wakeup_cpus; +#endif + register_t cr3, rf; + ACPI_STATUS status; + int ret; + + ret = -1; + + if (sc->acpi_wakeaddr == 0ul) + return (ret); + +#ifdef SMP + wakeup_cpus = PCPU_GET(other_cpus); +#endif + + AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc)); + + rf = intr_disable(); + intr_suspend(); + + /* + * Temporarily switch to the kernel pmap because it provides + * an identity mapping (setup at boot) for the low physical + * memory region containing the wakeup code. + */ + cr3 = rcr3(); + load_cr3(KPML4phys); + + stopfpu = &stopxpcbs[0].xpcb_pcb.pcb_save; + if (acpi_savecpu(&stopxpcbs[0])) { + fpugetregs(curthread, stopfpu); + +#ifdef SMP + if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, + "Failed to suspend APs: CPU mask = 0x%jx\n", + (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + goto out; + } +#endif + + WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep); + WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video); + + WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[0]); + WAKECODE_FIXUP(wakeup_gdt, uint16_t, + stopxpcbs[0].xpcb_gdt.rd_limit); + WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, + stopxpcbs[0].xpcb_gdt.rd_base); + WAKECODE_FIXUP(wakeup_cpu, int, 0); + + acpi_reset_tss(0); + + /* Call ACPICA to enter the desired sleep state */ + if (state == ACPI_STATE_S4 && sc->acpi_s4bios) + status = AcpiEnterSleepStateS4bios(); + else + status = AcpiEnterSleepState(state); + + if (status != AE_OK) { + device_printf(sc->acpi_dev, + "AcpiEnterSleepState failed - %s\n", + AcpiFormatException(status)); + goto out; + } + + for (;;) + ia32_pause(); + } else { + fpusetregs(curthread, stopfpu); + + WAKECODE_FIXUP(resume_beep, uint32_t, 0); + WAKECODE_FIXUP(reset_video, uint32_t, 0); +#ifdef SMP + if (wakeup_cpus != 0) + acpi_wakeup_cpus(sc, wakeup_cpus); +#endif + ret = 0; + } + +out: +#ifdef SMP + if (wakeup_cpus != 0) + restart_cpus(wakeup_cpus); +#endif + + load_cr3(cr3); + intr_resume(); + intr_restore(rf); + + AcpiSetFirmwareWakingVector(0); + + if (ret == 0 && mem_range_softc.mr_op != NULL && + mem_range_softc.mr_op->reinit != NULL) + mem_range_softc.mr_op->reinit(&mem_range_softc); + + /* If we beeped, turn it off after a delay. */ + if (acpi_resume_beep) + timeout(acpi_stop_beep, NULL, 3 * hz); + + return (ret); } +static vm_offset_t acpi_wakeaddr; + +static void +acpi_alloc_wakeup_handler(void) +{ + void *wakeaddr; + + if (!cold) + return; + + /* + * Specify the region for our wakeup code. We want it in the low 1 MB + * region, excluding video memory and above (0xa0000). We ask for + * it to be page-aligned, just to be safe. + */ + wakeaddr = contigmalloc(4 * PAGE_SIZE, M_DEVBUF, M_NOWAIT, 0, 0x9ffff, + PAGE_SIZE, 0ul); + if (wakeaddr == NULL) { + printf("%s: can't alloc wake memory\n", __func__); + return; + } + stopxpcbs = malloc(mp_ncpus * sizeof(*stopxpcbs), M_DEVBUF, M_NOWAIT); + if (stopxpcbs == NULL) { + contigfree(wakeaddr, 4 * PAGE_SIZE, M_DEVBUF); + printf("%s: can't alloc CPU state memory\n", __func__); + return; + } + acpi_wakeaddr = (vm_offset_t)wakeaddr; +} + +SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0); + void acpi_install_wakeup_handler(struct acpi_softc *sc) { + uint64_t *pt4, *pt3, *pt2; + int i; + + if (acpi_wakeaddr == 0ul) + return; + + sc->acpi_wakeaddr = acpi_wakeaddr; + sc->acpi_wakephys = vtophys(acpi_wakeaddr); + + bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode)); + + /* Patch GDT base address, ljmp targets and page table base address. */ + WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, + WAKECODE_PADDR(sc) + bootgdt); + WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, + WAKECODE_PADDR(sc) + wakeup_32); + WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, + WAKECODE_PADDR(sc) + wakeup_64); + WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys); + + /* Save pointers to some global data. */ + WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu); + WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys); + WAKECODE_FIXUP(wakeup_ctx, vm_offset_t, + WAKECODE_VADDR(sc) + wakeup_ctx); + WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER)); + WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT)); + WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR)); + WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR)); + WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR)); + WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK)); + + /* Build temporary page tables below realmode code. */ + pt4 = (uint64_t *)acpi_wakeaddr; + pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t); + pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t); + + /* Create the initial 1GB replicated page tables */ + for (i = 0; i < 512; i++) { + /* + * Each slot of the level 4 pages points + * to the same level 3 page + */ + pt4[i] = (uint64_t)(sc->acpi_wakephys + PAGE_SIZE); + pt4[i] |= PG_V | PG_RW | PG_U; + + /* + * Each slot of the level 3 pages points + * to the same level 2 page + */ + pt3[i] = (uint64_t)(sc->acpi_wakephys + (2 * PAGE_SIZE)); + pt3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + pt2[i] = i * (2 * 1024 * 1024); + pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } + + if (bootverbose) + device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n", + (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys); } --- sys/amd64/acpica/acpi_machdep.c (revision 189902) +++ sys/amd64/acpica/acpi_machdep.c (working copy) @@ -31,25 +31,50 @@ #include #include #include +#include #include #include #include +SYSCTL_DECL(_debug_acpi); + +uint32_t acpi_resume_beep; +TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep); +SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep, + 0, "Beep the PC speaker when resuming"); +uint32_t acpi_reset_video; +TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video); + static int intr_model = ACPI_INTR_PIC; +static struct apm_clone_data acpi_clone; int acpi_machdep_init(device_t dev) { - struct acpi_softc *sc; + struct acpi_softc *sc; sc = devclass_get_softc(devclass_find("acpi"), 0); + + /* Create a fake clone for /dev/acpi. */ + STAILQ_INIT(&sc->apm_cdevs); + acpi_clone.cdev = sc->acpi_dev_t; + acpi_clone.acpi_sc = sc; + ACPI_LOCK(acpi); + STAILQ_INSERT_TAIL(&sc->apm_cdevs, &acpi_clone, entries); + ACPI_UNLOCK(acpi); + sc->acpi_clone = &acpi_clone; acpi_install_wakeup_handler(sc); if (intr_model != ACPI_INTR_PIC) acpi_SetIntrModel(intr_model); + SYSCTL_ADD_UINT(&sc->acpi_sysctl_ctx, + SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO, + "reset_video", CTLFLAG_RW, &acpi_reset_video, 0, + "Call the VESA reset BIOS vector on the resume path"); + return (0); } --- sys/amd64/include/smp.h (revision 189902) +++ sys/amd64/include/smp.h (working copy) @@ -48,11 +48,13 @@ IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ + IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); +void cpususpend_handler(void); void init_secondary(void); void ipi_selected(u_int cpus, u_int ipi); void ipi_all_but_self(u_int ipi); --- sys/amd64/include/pcb.h (revision 189902) +++ sys/amd64/include/pcb.h (working copy) @@ -82,11 +82,25 @@ struct user_segment_descriptor pcb_gs32sd; }; +struct xpcb { + struct pcb xpcb_pcb; + register_t xpcb_cr0; + register_t xpcb_cr2; + register_t xpcb_cr4; + register_t xpcb_kgsbase; + uint32_t xpcb_ss; + struct region_descriptor xpcb_gdt; + struct region_descriptor xpcb_idt; + struct region_descriptor xpcb_ldt; + uint16_t xpcb_tr; +}; + #ifdef _KERNEL struct trapframe; void makectx(struct trapframe *, struct pcb *); void savectx(struct pcb *); +int savectx2(struct xpcb *); #endif #endif /* _AMD64_PCB_H_ */ --- sys/amd64/include/apicvar.h (revision 189902) +++ sys/amd64/include/apicvar.h (working copy) @@ -130,6 +130,7 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ +#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ /* * The spurious interrupt can share the priority class with the IPIs since --- sys/amd64/amd64/db_trace.c (revision 189902) +++ sys/amd64/amd64/db_trace.c (working copy) @@ -316,6 +316,7 @@ strcmp(name, "Xtimerint") == 0 || strcmp(name, "Xipi_intr_bitmap_handler") == 0 || strcmp(name, "Xcpustop") == 0 || + strcmp(name, "Xcpususpend") == 0 || strcmp(name, "Xrendezvous") == 0) frame_type = INTERRUPT; else if (strcmp(name, "Xfast_syscall") == 0) @@ -327,6 +328,7 @@ /* XXX: These are interrupts with trap frames. */ else if (strcmp(name, "Xtimerint") == 0 || strcmp(name, "Xcpustop") == 0 || + strcmp(name, "Xcpususpend") == 0 || strcmp(name, "Xrendezvous") == 0 || strcmp(name, "Xipi_intr_bitmap_handler") == 0) frame_type = TRAP_INTERRUPT; --- sys/amd64/amd64/genassym.c (revision 189902) +++ sys/amd64/amd64/genassym.c (working copy) @@ -155,6 +155,19 @@ ASSYM(PCB_SIZE, sizeof(struct pcb)); +ASSYM(XPCB_PCB, offsetof(struct xpcb, xpcb_pcb)); +ASSYM(XPCB_CR0, offsetof(struct xpcb, xpcb_cr0)); +ASSYM(XPCB_CR2, offsetof(struct xpcb, xpcb_cr2)); +ASSYM(XPCB_CR4, offsetof(struct xpcb, xpcb_cr4)); +ASSYM(XPCB_KGSBASE, offsetof(struct xpcb, xpcb_kgsbase)); +ASSYM(XPCB_SS, offsetof(struct xpcb, xpcb_ss)); +ASSYM(XPCB_GDT, offsetof(struct xpcb, xpcb_gdt)); +ASSYM(XPCB_IDT, offsetof(struct xpcb, xpcb_idt)); +ASSYM(XPCB_LDT, offsetof(struct xpcb, xpcb_ldt)); +ASSYM(XPCB_TR, offsetof(struct xpcb, xpcb_tr)); + +ASSYM(XPCB_SIZE, sizeof(struct xpcb)); + ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0)); ASSYM(TF_R15, offsetof(struct trapframe, tf_r15)); --- sys/amd64/amd64/cpu_switch.S (revision 189902) +++ sys/amd64/amd64/cpu_switch.S (working copy) @@ -325,9 +325,8 @@ movq %r11,%dr6 movq %rax,%dr7 jmp done_load_dr +END(cpu_switch) -END(cpu_switch) - /* * savectx(pcb) * Update pcb, saving current processor state. @@ -386,3 +385,74 @@ ret END(savectx) + +/* + * savectx2(xpcb) + * Update xpcb, saving current processor state. + */ +ENTRY(savectx2) + /* Fetch XPCB. */ + movq %rdi,%r8 + + /* Save caller's return address. */ + movq (%rsp),%rax + movq %rax,PCB_RIP(%r8) + + mov %ds,PCB_DS(%r8) + mov %es,PCB_ES(%r8) + mov %ss,XPCB_SS(%r8) + mov %fs,PCB_FS(%r8) + mov %gs,PCB_GS(%r8) + + movq %rbx,PCB_RBX(%r8) + movq %rsp,PCB_RSP(%r8) + movq %rbp,PCB_RBP(%r8) + movq %r12,PCB_R12(%r8) + movq %r13,PCB_R13(%r8) + movq %r14,PCB_R14(%r8) + movq %r15,PCB_R15(%r8) + + movq %cr0,%rax + movq %rax,XPCB_CR0(%r8) + movq %cr2,%rax + movq %rax,XPCB_CR2(%r8) + movq %cr4,%rax + movq %rax,XPCB_CR4(%r8) + + movq %dr0,%rax + movq %rax,PCB_DR0(%r8) + movq %dr1,%rax + movq %rax,PCB_DR1(%r8) + movq %dr2,%rax + movq %rax,PCB_DR2(%r8) + movq %dr3,%rax + movq %rax,PCB_DR3(%r8) + movq %dr6,%rax + movq %rax,PCB_DR6(%r8) + movq %dr7,%rax + movq %rax,PCB_DR7(%r8) + + sgdt XPCB_GDT(%r8) + sidt XPCB_IDT(%r8) + sldt XPCB_LDT(%r8) + str XPCB_TR(%r8) + + movl $MSR_FSBASE,%ecx + rdmsr + shlq $32,%rdx + leaq (%rax,%rdx),%rax + movq %rax,PCB_FSBASE(%r8) + movl $MSR_GSBASE,%ecx + rdmsr + shlq $32,%rdx + leaq (%rax,%rdx),%rax + movq %rax,PCB_GSBASE(%r8) + movl $MSR_KGSBASE,%ecx + rdmsr + shlq $32,%rdx + leaq (%rax,%rdx),%rax + movq %rax,XPCB_KGSBASE(%r8) + + movl $1, %eax + ret +END(savectx2) --- sys/amd64/amd64/mp_machdep.c (revision 189902) +++ sys/amd64/amd64/mp_machdep.c (working copy) @@ -58,6 +58,7 @@ #include #include +#include #include #include #include @@ -103,6 +104,7 @@ extern int _udatasel; struct pcb stoppcbs[MAXCPU]; +struct xpcb *stopxpcbs = NULL; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; @@ -344,6 +346,9 @@ /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); + /* Install an inter-CPU IPI for CPU suspend/resume */ + setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); + /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); @@ -1145,6 +1150,41 @@ } /* + * Handle an IPI_SUSPEND by saving our current context and spinning until we + * are resumed. + */ +void +cpususpend_handler(void) +{ + struct savefpu *stopfpu; + register_t cr3, rf; + int cpu = PCPU_GET(cpuid); + int cpumask = PCPU_GET(cpumask); + + rf = intr_disable(); + cr3 = rcr3(); + stopfpu = &stopxpcbs[cpu].xpcb_pcb.pcb_save; + if (savectx2(&stopxpcbs[cpu])) { + fpugetregs(curthread, stopfpu); + wbinvd(); + atomic_set_int(&stopped_cpus, cpumask); + } else + fpusetregs(curthread, stopfpu); + + /* Wait for resume */ + while (!(started_cpus & cpumask)) + ia32_pause(); + + atomic_clear_int(&started_cpus, cpumask); + atomic_clear_int(&stopped_cpus, cpumask); + + /* Restore CR3 and enable interrupts */ + load_cr3(cr3); + lapic_setup(0); + intr_restore(rf); +} + +/* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ --- sys/amd64/amd64/amd64_mem.c (revision 189902) +++ sys/amd64/amd64/amd64_mem.c (working copy) @@ -73,11 +73,13 @@ static int amd64_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); static void amd64_mrAPinit(struct mem_range_softc *sc); +static void amd64_mrreinit(struct mem_range_softc *sc); static struct mem_range_ops amd64_mrops = { amd64_mrinit, amd64_mrset, - amd64_mrAPinit + amd64_mrAPinit, + amd64_mrreinit }; /* XXX for AP startup hook */ @@ -668,7 +670,31 @@ wrmsr(MSR_MTRRdefType, mtrrdef); } +/* + * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor + * list. + * + * XXX Must be called with interrupts enabled. + */ static void +amd64_mrreinit(struct mem_range_softc *sc) +{ +#ifdef SMP + /* + * We should use ipi_all_but_self() to call other CPUs into a + * locking gate, then call a target function to do this work. + * The "proper" solution involves a generalised locking gate + * implementation, not ready yet. + */ + smp_rendezvous(NULL, (void *)amd64_mrAPinit, NULL, sc); +#else + disable_intr(); /* disable interrupts */ + amd64_mrAPinit(sc); + enable_intr(); +#endif +} + +static void amd64_mem_drvinit(void *unused) { --- sys/amd64/amd64/apic_vector.S (revision 189902) +++ sys/amd64/amd64/apic_vector.S (working copy) @@ -224,6 +224,22 @@ iretq /* + * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. + */ + .text + SUPERALIGN_TEXT +IDTVEC(cpususpend) + PUSH_FRAME + + movq lapic, %rax + movl $0, LA_EOI(%rax) /* End Of Interrupt to APIC */ + + call cpususpend_handler + + POP_FRAME + iretq + +/* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. --- sys/sys/smp.h (revision 189902) +++ sys/sys/smp.h (working copy) @@ -122,6 +122,9 @@ void forward_roundrobin(void); int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); +#if defined(__amd64__) +int suspend_cpus(cpumask_t); +#endif void smp_rendezvous_action(void); extern struct mtx smp_ipi_mtx; --- sys/sys/memrange.h (revision 189902) +++ sys/sys/memrange.h (working copy) @@ -52,6 +52,7 @@ void (*init)(struct mem_range_softc *sc); int (*set)(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); void (*initAP)(struct mem_range_softc *sc); + void (*reinit)(struct mem_range_softc *sc); }; struct mem_range_softc @@ -68,4 +69,3 @@ extern int mem_range_attr_set(struct mem_range_desc *mrd, int *arg); #endif -