WARNING: DO NOT EVER USE THIS IN PRODUCTION! This is WIP, very intrusive, and most importantly, may not work at all. It is working fine on several machines that I have, though. Index: sys/amd64/acpica/Makefile =================================================================== RCS file: sys/amd64/acpica/Makefile diff -N sys/amd64/acpica/Makefile --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/amd64/acpica/Makefile 11 Dec 2008 00:41:40 -0000 @@ -0,0 +1,33 @@ +# $FreeBSD$ + +# Correct path for kernel builds +# Don't rely on the kernel's .depend file +.ifdef MAKESRCPATH +.PATH: ${MAKESRCPATH} +DEPENDFILE= +.else +MAKESRCPATH= ${.CURDIR} +CLEANFILES= acpi_wakecode.h acpi_wakedata.h acpi_wakecode.bin acpi_wakecode.o +.endif +.if ${CC} == "icc" +CFLAGS+= -restrict +NOSTDINC= -X +.else +NOSTDINC= -nostdinc +.endif +CFLAGS+= ${NOSTDINC} -include opt_global.h -I. -I${MAKESRCPATH}/../.. + +all: acpi_wakecode.h acpi_wakedata.h + +acpi_wakecode.o: acpi_wakecode.S assym.s + +acpi_wakecode.bin: acpi_wakecode.o + objcopy -S -O binary acpi_wakecode.o acpi_wakecode.bin + +acpi_wakecode.h: acpi_wakecode.bin + sh ${MAKESRCPATH}/genwakecode.sh > acpi_wakecode.h + +acpi_wakedata.h: acpi_wakecode.bin + sh ${MAKESRCPATH}/genwakedata.sh > acpi_wakedata.h + +.include Index: sys/amd64/acpica/acpi_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/acpica/acpi_machdep.c,v retrieving revision 1.18 diff -p -u -r1.18 acpi_machdep.c --- sys/amd64/acpica/acpi_machdep.c 13 Mar 2008 20:39:02 -0000 1.18 +++ sys/amd64/acpica/acpi_machdep.c 11 Dec 2008 00:41:41 -0000 @@ -31,25 +31,50 @@ __FBSDID("$FreeBSD: src/sys/amd64/acpica #include #include #include +#include #include #include #include +SYSCTL_DECL(_debug_acpi); + +uint32_t acpi_resume_beep; +TUNABLE_INT("debug.acpi.resume_beep", &acpi_resume_beep); +SYSCTL_UINT(_debug_acpi, OID_AUTO, resume_beep, CTLFLAG_RW, &acpi_resume_beep, + 0, "Beep the PC speaker when resuming"); +uint32_t acpi_reset_video; +TUNABLE_INT("hw.acpi.reset_video", &acpi_reset_video); + static int intr_model = ACPI_INTR_PIC; +static struct apm_clone_data acpi_clone; int acpi_machdep_init(device_t dev) { - struct acpi_softc *sc; + struct acpi_softc *sc; sc = devclass_get_softc(devclass_find("acpi"), 0); + + /* Create a fake clone for /dev/acpi. */ + STAILQ_INIT(&sc->apm_cdevs); + acpi_clone.cdev = sc->acpi_dev_t; + acpi_clone.acpi_sc = sc; + ACPI_LOCK(acpi); + STAILQ_INSERT_TAIL(&sc->apm_cdevs, &acpi_clone, entries); + ACPI_UNLOCK(acpi); + sc->acpi_clone = &acpi_clone; acpi_install_wakeup_handler(sc); if (intr_model != ACPI_INTR_PIC) acpi_SetIntrModel(intr_model); + SYSCTL_ADD_UINT(&sc->acpi_sysctl_ctx, + SYSCTL_CHILDREN(sc->acpi_sysctl_tree), OID_AUTO, + "reset_video", CTLFLAG_RW, &acpi_reset_video, 0, + "Call the VESA reset BIOS vector on the resume path"); + return (0); } Index: sys/amd64/acpica/acpi_switch.S =================================================================== RCS file: sys/amd64/acpica/acpi_switch.S diff -N sys/amd64/acpica/acpi_switch.S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/amd64/acpica/acpi_switch.S 11 Dec 2008 00:41:42 -0000 @@ -0,0 +1,191 @@ +/*- + * Copyright (c) 2001 Takanori Watanabe + * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2008 Jung-uk Kim + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include + +#include "acpi_wakedata.h" +#include "assym.s" + +#define WAKEUP_DECL(member) \ + .set WAKEUP_ ## member, wakeup_ ## member - wakeup_ctx + + WAKEUP_DECL(xpcb) + WAKEUP_DECL(gdt) + WAKEUP_DECL(efer) + WAKEUP_DECL(pat) + WAKEUP_DECL(star) + WAKEUP_DECL(lstar) + WAKEUP_DECL(cstar) + WAKEUP_DECL(sfmask) + WAKEUP_DECL(cpu) + +#define WAKEUP_CTX(member) WAKEUP_ ## member (%rdi) +#define WAKEUP_PCB(member) PCB_ ## member(%r11) +#define WAKEUP_XPCB(member) XPCB_ ## member(%r11) + +ENTRY(acpi_restorecpu) + /* Switch to KPML4phys. */ + movq %rsi, %rax + movq %rax, %cr3 + + /* Restore GDT. */ + lgdt WAKEUP_CTX(gdt) + jmp 1f +1: + + /* Fetch PCB. */ + movq WAKEUP_CTX(xpcb), %r11 + + /* Restore segment registers. */ + movl WAKEUP_PCB(DS), %ds + movl WAKEUP_PCB(ES), %es + movl WAKEUP_XPCB(SS), %ss + movl WAKEUP_PCB(FS), %fs + movl WAKEUP_PCB(GS), %gs + + movl $MSR_FSBASE, %ecx + movl WAKEUP_PCB(FSBASE), %eax + movl 4 + WAKEUP_PCB(FSBASE), %edx + wrmsr + movl $MSR_GSBASE, %ecx + movl WAKEUP_PCB(GSBASE), %eax + movl 4 + WAKEUP_PCB(GSBASE), %edx + wrmsr + movl $MSR_KGSBASE, %ecx + movl WAKEUP_XPCB(KGSBASE), %eax + movl 4 + WAKEUP_XPCB(KGSBASE), %edx + wrmsr + + /* Restore EFER. */ + movl $MSR_EFER, %ecx + movl WAKEUP_CTX(efer), %eax + wrmsr + + /* Restore PAT. */ + movl $MSR_PAT, %ecx + movl WAKEUP_CTX(pat), %eax + movl 4 + WAKEUP_CTX(pat), %edx + wrmsr + + /* Restore fast syscall stuff. */ + movl $MSR_STAR, %ecx + movl WAKEUP_CTX(star), %eax + movl 4 + WAKEUP_CTX(star), %edx + wrmsr + movl $MSR_LSTAR, %ecx + movl WAKEUP_CTX(lstar), %eax + movl 4 + WAKEUP_CTX(lstar), %edx + wrmsr + movl $MSR_CSTAR, %ecx + movl WAKEUP_CTX(cstar), %eax + movl 4 + WAKEUP_CTX(cstar), %edx + wrmsr + movl $MSR_SF_MASK, %ecx + movl WAKEUP_CTX(sfmask), %eax + wrmsr + + /* Restore CR0, CR2 and CR4. */ + movq WAKEUP_XPCB(CR0), %rax + movq %rax, %cr0 + movq WAKEUP_XPCB(CR2), %rax + movq %rax, %cr2 + movq WAKEUP_XPCB(CR4), %rax + movq %rax, %cr4 + + /* Restore descriptor tables. */ + lidt WAKEUP_XPCB(IDT) + lldt WAKEUP_XPCB(LDT) + movw WAKEUP_XPCB(TR), %ax + ltr %ax + + /* Restore other callee saved registers. */ + movq WAKEUP_PCB(R15), %r15 + movq WAKEUP_PCB(R14), %r14 + movq WAKEUP_PCB(R13), %r13 + movq WAKEUP_PCB(R12), %r12 + movq WAKEUP_PCB(RBP), %rbp + movq WAKEUP_PCB(RSP), %rsp + movq WAKEUP_PCB(RBX), %rbx + + /* Restore debug registers. */ + movq WAKEUP_PCB(DR0), %rax + movq %rax, %dr0 + movq WAKEUP_PCB(DR1), %rax + movq %rax, %dr1 + movq WAKEUP_PCB(DR2), %rax + movq %rax, %dr2 + movq WAKEUP_PCB(DR3), %rax + movq %rax, %dr3 + movq WAKEUP_PCB(DR6), %rax + movq %rax, %dr6 + movq WAKEUP_PCB(DR7), %rax + movq %rax, %dr7 + + /* Restore return address. */ + movq WAKEUP_PCB(RIP), %rax + movq %rax, (%rsp) + + /* Check if it is BSP or AP. */ + movq WAKEUP_CTX(cpu), %rax + testq %rax, %rax + jz 1f + + /* Restore CR3 on AP. */ + movq %rax, %rcx + movq WAKEUP_PCB(CR3), %rax + movq %rax, %cr3 + + /* Restore RFLAGS on AP. */ + pushq WAKEUP_PCB(FLAGS) + popfq + + /* Indicate that AP is resumed. */ + xorl %eax, %eax + movl %eax, (%rcx) +1: + ret +END(acpi_restorecpu) + +ENTRY(acpi_savecpu) + /* Fetch PCB and save CPU context. */ + movq %rdi, %r10 + call savectx2 + movq %r10, %r11 + + /* Patch caller's return address and stack pointer. */ + movq (%rsp), %rax + movq %rax, WAKEUP_PCB(RIP) + movq %rsp, %rax + movq %rax, WAKEUP_PCB(RSP) + + movl $1, %eax + ret +END(acpi_savecpu) Index: sys/amd64/acpica/acpi_wakecode.S =================================================================== RCS file: sys/amd64/acpica/acpi_wakecode.S diff -N sys/amd64/acpica/acpi_wakecode.S --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/amd64/acpica/acpi_wakecode.S 11 Dec 2008 00:41:43 -0000 @@ -0,0 +1,279 @@ +/*- + * Copyright (c) 2001 Takanori Watanabe + * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 Jung-uk Kim + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#define LOCORE + +#include +#include +#include + +#include "assym.s" + +/* + * Resume entry point for real mode. + * + * If XFirmwareWakingVector is zero and FirmwareWakingVector is non-zero + * in FACS, the BIOS enters here in real mode after POST with CS set to + * (FirmwareWakingVector >> 4) and IP set to (FirmwareWakingVector & 0xf). + * Depending on the previous sleep state, we may need to initialize more + * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk). + * + * Note: If XFirmwareWakingVector is non-zero, it should disable address + * translation/paging and interrupts, load all segment registers with + * a flat 4 GB address space, and set EFLAGS.IF to zero. Currently + * this mode is not supported by this code. + */ + + .data /* So we can modify it */ + + ALIGN_TEXT +wakeup_start: + .code16 + /* + * Set up segment registers for real mode, a small stack for + * any calls we make, and clear any flags. + */ + cli /* make sure no interrupts */ + cld + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss + movw $PAGE_SIZE - 8, %sp + pushw $0 + popfw + + /* To debug resume hangs, beep the speaker if the user requested. */ + cmpw $0, resume_beep - wakeup_start + je 1f + movb $0xc0, %al + outb %al, $0x42 + movb $0x04, %al + outb %al, $0x42 + inb $0x61, %al + orb $0x3, %al + outb %al, $0x61 + movw $0, resume_beep - wakeup_start +1: + + /* Re-initialize video BIOS if the reset_video tunable is set. */ + cmpw $0, reset_video - wakeup_start + je 1f + lcall $0xc000, $3 + movw $0, reset_video - wakeup_start + + /* + * Set up segment registers for real mode again in case the + * previous BIOS call clobbers them. + */ + mov %cs, %ax + mov %ax, %ds + mov %ax, %ss +1: + + /* + * Find relocation base and patch the gdt descript and ljmp targets + */ + xorl %ebx, %ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ + + /* + * Load the descriptor table pointer. We'll need it when running + * in 16-bit protected mode. + */ + lgdtl bootgdtdesc - wakeup_start + + /* Enable protected mode */ + movl $CR0_PE, %eax + mov %eax, %cr0 + + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +wakeup_sw32: + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long wakeup_32 - wakeup_start /* offset in segment */ + .word bootcode32 - bootgdt /* index in gdt for 32 bit code */ + + /* + * At this point, we are running in 32 bit legacy protected mode. + */ + .code32 +wakeup_32: + + mov $bootdata32 - bootgdt, %eax + mov %ax, %ds + + /* Turn on the PAE and PSE bits for when paging is enabled */ + mov %cr4, %eax + orl $(CR4_PAE | CR4_PSE), %eax + mov %eax, %cr4 + + /* + * Enable EFER.LME so that we get long mode when all the prereqs are + * in place. In this case, it turns on when CR0_PG is finally enabled. + * Pick up a few other EFER bits that we'll use need we're here. + */ + movl $MSR_EFER, %ecx + rdmsr + orl $EFER_LME | EFER_SCE, %eax + wrmsr + + /* + * Point to the embedded page tables for startup. Note that this + * only gets accessed after we're actually in 64 bit mode, however + * we can only set the bottom 32 bits of %cr3 in this state. This + * means we are required to use a temporary page table that is below + * the 4GB limit. %ebx is still our relocation base. We could just + * subtract 3 * PAGE_SIZE, but that would be too easy. + */ + leal wakeup_pagetables - wakeup_start(%ebx), %eax + movl (%eax), %eax + mov %eax, %cr3 + + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 + + /* + * At this point paging is enabled, and we are in "compatability" mode. + * We do another far jump to reload %cs with the 64 bit selector. + * %cr3 points to a 4-level page table page. + * We cannot yet jump all the way to the kernel because we can only + * specify a 32 bit linear address. So, yet another trampoline. + * + * The following instruction is: + * ljmp $bootcode64 - bootgdt, $wakeup_64 - wakeup_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ +wakeup_sw64: + .byte 0xea /* opcode for far jump */ + .long wakeup_64 - wakeup_start /* offset in segment */ + .word bootcode64 - bootgdt /* index in gdt for 64 bit code */ + + /* + * Yeehar! We're running in 64-bit mode! We can mostly ignore our + * segment registers, and get on with it. + * Note that we are running at the correct virtual address, but with + * a 1:1 1GB mirrored mapping over entire address space. We had better + * switch to a real %cr3 promptly so that we can get to the direct map + * space. Remember that jmp is relative and that we've been relocated, + * so use an indirect jump. + */ + .code64 +wakeup_64: + mov $bootdata64 - bootgdt, %eax + mov %ax, %ds + + /* Restore arguments and return. */ + movq wakeup_ctx - wakeup_start(%rbx), %rdi + movq wakeup_kpml4 - wakeup_start(%rbx), %rsi + movq wakeup_retaddr - wakeup_start(%rbx), %rax + jmp *%rax + + ALIGN_DATA +bootgdt: + .long 0x00000000 + .long 0x00000000 + +bootcode64: + .long 0x0000ffff + .long 0x00af9b00 + +bootdata64: + .long 0x0000ffff + .long 0x00af9300 + +bootcode32: + .long 0x0000ffff + .long 0x00cf9b00 + +bootdata32: + .long 0x0000ffff + .long 0x00cf9300 +bootgdtend: + +wakeup_pagetables: + .long 0 + +bootgdtdesc: + .word bootgdtend - bootgdt /* Length */ + .long bootgdt - wakeup_start /* Offset plus %ds << 4 */ + + ALIGN_DATA +resume_beep: + .long 0 +reset_video: + .long 0 +wakeup_retaddr: + .quad 0 +wakeup_kpml4: + .quad 0 + +wakeup_ctx: + .quad 0 +wakeup_xpcb: + .quad 0 +wakeup_gdt: + .word 0 + .quad 0 +wakeup_efer: + .quad 0 +wakeup_pat: + .quad 0 +wakeup_star: + .quad 0 +wakeup_lstar: + .quad 0 +wakeup_cstar: + .quad 0 +wakeup_sfmask: + .quad 0 +wakeup_cpu: + .quad 0 +dummy: Index: sys/amd64/acpica/acpi_wakeup.c =================================================================== RCS file: /home/ncvs/src/sys/amd64/acpica/acpi_wakeup.c,v retrieving revision 1.22 diff -p -u -r1.22 acpi_wakeup.c --- sys/amd64/acpica/acpi_wakeup.c 11 Sep 2005 18:39:00 -0000 1.22 +++ sys/amd64/acpica/acpi_wakeup.c 11 Dec 2008 00:41:44 -0000 @@ -1,6 +1,8 @@ /*- * Copyright (c) 2001 Takanori Watanabe * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2003 Peter Wemm + * Copyright (c) 2008 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,18 +31,435 @@ __FBSDID("$FreeBSD: src/sys/amd64/acpica/acpi_wakeup.c,v 1.22 2005/09/11 18:39:00 obrien Exp $"); #include +#include +#include #include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef SMP +#include +#include +#include +#include +#endif #include #include +#include "acpi_wakecode.h" +#include "acpi_wakedata.h" + +/* Make sure the code is less than a page and leave room for the stack. */ +CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); + +#ifndef _SYS_CDEFS_H_ +#error this file needs sys/cdefs.h as a prerequisite +#endif + +extern uint32_t acpi_resume_beep; +extern uint32_t acpi_reset_video; + +static struct savefpu stopfpu; +static struct xpcb stopxpcb; + +int acpi_restorecpu(struct xpcb *, vm_offset_t); +int acpi_savecpu(struct xpcb *); + +static void acpi_reset_tss(int cpu); +static void acpi_realmodeinst(void *, bus_dma_segment_t *, + int, int); +static void acpi_alloc_wakeup_handler(void); +static void acpi_stop_beep(void *); + +#ifdef SMP +static int acpi_wakeup_ap(struct acpi_softc *, int); +static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +#endif + +#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) +#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (3 * PAGE_SIZE)) +#define WAKECODE_FIXUP(offset, type, val) do { \ + type *addr; \ + addr = (type *)(WAKECODE_VADDR(sc) + offset); \ + *addr = val; \ +} while (0) + +/* Turn off bits 1&2 of the PIT, stopping the beep. */ +static void +acpi_stop_beep(void *arg) +{ + outb(0x61, inb(0x61) & ~0x3); +} + +#ifdef SMP +static int +acpi_wakeup_ap(struct acpi_softc *sc, int cpu) +{ + int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff; + int apic_id = cpu_apic_ids[cpu]; + int cpu_id = cpu; + int ms; + + WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcbs[cpu]); + WAKECODE_FIXUP(wakeup_gdt, uint16_t, stopxpcbs[cpu].xpcb_gdt.rd_limit); + WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, + stopxpcbs[cpu].xpcb_gdt.rd_base); + WAKECODE_FIXUP(wakeup_cpu, int *, &cpu_id); + + acpi_reset_tss(cpu); + + /* do an INIT IPI: assert RESET */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); + + /* wait for pending status end */ + lapic_ipi_wait(-1); + + /* do an INIT IPI: deassert RESET */ + lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); + + /* wait for pending status end */ + DELAY(10000); /* wait ~10mS */ + lapic_ipi_wait(-1); + + /* + * next we do a STARTUP IPI: the previous INIT IPI might still be + * latched, (P5 bug) this 1st STARTUP would then terminate + * immediately, and the previously started INIT IPI would continue. OR + * the previous INIT IPI has already run. and this STARTUP IPI will + * run. OR the previous INIT IPI was ignored. and this STARTUP IPI + * will run. + */ + + /* do a STARTUP IPI */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* + * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF + * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR + * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is + * recognized after hardware RESET or INIT IPI. + */ + + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* Wait up to 5 seconds for it to start. */ + for (ms = 0; ms < 5000; ms++) { + if (cpu_id == 0) + return (1); /* return SUCCESS */ + DELAY(1000); + } + return (0); /* return FAILURE */ +} + +#define WARMBOOT_TARGET 0 +#define WARMBOOT_OFF (KERNBASE + 0x0467) +#define WARMBOOT_SEG (KERNBASE + 0x0469) + +#define CMOS_REG (0x70) +#define CMOS_DATA (0x71) +#define BIOS_RESET (0x0f) +#define BIOS_WARM (0x0a) + +static void +acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +{ + uint32_t mpbioswarmvec; + cpumask_t map; + int cpu; + u_char mpbiosreason; + + /* save the current value of the warm-start vector */ + mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); + outb(CMOS_REG, BIOS_RESET); + mpbiosreason = inb(CMOS_DATA); + + /* setup a vector to our boot code */ + *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4; + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ + + /* Wake up each AP. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { + map = 1ul << cpu; + if ((wakeup_cpus & map) != map) + continue; + if (acpi_wakeup_ap(sc, cpu) == 0) { + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", + cpu, cpu_apic_ids[cpu]); + } + } + + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, mpbiosreason); + + restart_cpus(wakeup_cpus); +} +#endif + +static void +acpi_reset_tss(int cpu) +{ + uint32_t *tss; + + /* + * We have to clear "task busy" bit in TSS to restore + * task register later. Otherwise, ltr causes GPF. + */ + tss = (uint32_t *)&gdt[NGDT * cpu + GPROC0_SEL] + 1; + *tss &= ~((SDT_SYSBSY ^ SDT_SYSTSS) << 8); +} + int acpi_sleep_machdep(struct acpi_softc *sc, int state) { - return (0); +#ifdef SMP + cpumask_t wakeup_cpus; +#endif + register_t cr3, rf; + ACPI_STATUS status; + int ret; + + ret = -1; + if (sc->acpi_wakeaddr == 0) + return (0); + +#ifdef SMP + wakeup_cpus = PCPU_GET(other_cpus); +#endif + + AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc)); + + rf = read_rflags(); + + /* + * Temporarily switch to the kernel pmap because it provides + * an identity mapping (setup at boot) for the low physical + * memory region containing the wakeup code. + */ + cr3 = rcr3(); + load_cr3(KPML4phys); + + ACPI_DISABLE_IRQS(); + if (acpi_savecpu(&stopxpcb)) { + fpugetregs(curthread, &stopfpu); + intr_suspend(); + +#ifdef SMP + if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, + "Failed to suspend APs: CPU mask = 0x%jx\n", + (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + restart_cpus(stopped_cpus); + goto out; + } +#endif + + WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep); + WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video); + + WAKECODE_FIXUP(wakeup_xpcb, struct xpcb *, &stopxpcb); + WAKECODE_FIXUP(wakeup_gdt, uint16_t, + stopxpcb.xpcb_gdt.rd_limit); + WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, + stopxpcb.xpcb_gdt.rd_base); + WAKECODE_FIXUP(wakeup_cpu, int *, NULL); + + acpi_reset_tss(0); + + /* Call ACPICA to enter the desired sleep state */ + if (state == ACPI_STATE_S4 && sc->acpi_s4bios) + status = AcpiEnterSleepStateS4bios(); + else + status = AcpiEnterSleepState(state); + + if (status != AE_OK) { + device_printf(sc->acpi_dev, + "AcpiEnterSleepState failed - %s\n", + AcpiFormatException(status)); +#ifdef SMP + if (wakeup_cpus != 0) + restart_cpus(wakeup_cpus); +#endif + goto out; + } + + for (;;) + ia32_pause(); + } else { + fpusetregs(curthread, &stopfpu); + + WAKECODE_FIXUP(resume_beep, uint32_t, 0); + WAKECODE_FIXUP(reset_video, uint32_t, 0); +#ifdef SMP + if (wakeup_cpus != 0) + acpi_wakeup_cpus(sc, wakeup_cpus); +#endif + ret = 0; + } + +out: + intr_resume(); + load_cr3(cr3); + write_rflags(rf); + + AcpiSetFirmwareWakingVector(0); + + if (ret == 0 && mem_range_softc.mr_op != NULL && + mem_range_softc.mr_op->reinit != NULL) + mem_range_softc.mr_op->reinit(&mem_range_softc); + + /* If we beeped, turn it off after a delay. */ + if (acpi_resume_beep) + timeout(acpi_stop_beep, NULL, 3 * hz); + + return (ret); +} + +static bus_dma_tag_t acpi_waketag; +static bus_dmamap_t acpi_wakemap; +static vm_offset_t acpi_wakeaddr; + +static void +acpi_alloc_wakeup_handler(void) +{ + void *wakeaddr; + + if (!cold) + return; + + /* + * Specify the region for our wakeup code. We want it in the low 1 MB + * region, excluding video memory and above (0xa0000). We ask for + * it to be page-aligned, just to be safe. + */ + if (bus_dma_tag_create(/*parent*/ NULL, + /*alignment*/ PAGE_SIZE, /*no boundary*/ 0, + /*lowaddr*/ 0x9ffff, /*highaddr*/ BUS_SPACE_MAXADDR, NULL, NULL, + /*maxsize*/ (4 * PAGE_SIZE), /*segments*/ 1, + /*maxsegsize*/ (4 * PAGE_SIZE), + 0, busdma_lock_mutex, &Giant, &acpi_waketag) != 0) { + printf("acpi_alloc_wakeup_handler: can't create wake tag\n"); + return; + } + if (bus_dmamem_alloc(acpi_waketag, &wakeaddr, BUS_DMA_NOWAIT, + &acpi_wakemap) != 0) { + printf("acpi_alloc_wakeup_handler: can't alloc wake memory\n"); + return; + } + acpi_wakeaddr = (vm_offset_t)wakeaddr; +} + +SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0); + +static void +acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, int nsegs, int error) +{ + struct acpi_softc *sc; + uint64_t *pt4, *pt3, *pt2; + int i; + + sc = arg; + sc->acpi_wakephys = segs[0].ds_addr; + + bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode)); + + /* Patch GDT base address, ljmp targets and page table base address. */ + WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, + WAKECODE_PADDR(sc) + bootgdt); + WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, + WAKECODE_PADDR(sc) + wakeup_32); + WAKECODE_FIXUP((wakeup_sw64 + 1), uint32_t, + WAKECODE_PADDR(sc) + wakeup_64); + WAKECODE_FIXUP(wakeup_pagetables, uint32_t, sc->acpi_wakephys); + + /* Save pointers to some global data. */ + WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu); + WAKECODE_FIXUP(wakeup_kpml4, uint64_t, KPML4phys); + WAKECODE_FIXUP(wakeup_ctx, vm_offset_t, + WAKECODE_VADDR(sc) + wakeup_ctx); + WAKECODE_FIXUP(wakeup_efer, uint64_t, rdmsr(MSR_EFER)); + WAKECODE_FIXUP(wakeup_pat, uint64_t, rdmsr(MSR_PAT)); + WAKECODE_FIXUP(wakeup_star, uint64_t, rdmsr(MSR_STAR)); + WAKECODE_FIXUP(wakeup_lstar, uint64_t, rdmsr(MSR_LSTAR)); + WAKECODE_FIXUP(wakeup_cstar, uint64_t, rdmsr(MSR_CSTAR)); + WAKECODE_FIXUP(wakeup_sfmask, uint64_t, rdmsr(MSR_SF_MASK)); + + /* Build temporary page tables below realmode code. */ + pt4 = (uint64_t *)((uintptr_t)sc->acpi_wakeaddr); + pt3 = pt4 + (PAGE_SIZE) / sizeof(uint64_t); + pt2 = pt3 + (PAGE_SIZE) / sizeof(uint64_t); + + /* Create the initial 1GB replicated page tables */ + for (i = 0; i < 512; i++) { + /* + * Each slot of the level 4 pages points + * to the same level 3 page + */ + pt4[i] = (uint64_t)(uintptr_t)(sc->acpi_wakephys + PAGE_SIZE); + pt4[i] |= PG_V | PG_RW | PG_U; + + /* + * Each slot of the level 3 pages points + * to the same level 2 page + */ + pt3[i] = (uint64_t)(uintptr_t) + (sc->acpi_wakephys + (2 * PAGE_SIZE)); + pt3[i] |= PG_V | PG_RW | PG_U; + + /* The level 2 page slots are mapped with 2MB pages for 1GB. */ + pt2[i] = i * (2 * 1024 * 1024); + pt2[i] |= PG_V | PG_RW | PG_PS | PG_U; + } + + if (bootverbose) + device_printf(sc->acpi_dev, "wakeup code va %#lx pa %#jx\n", + acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { + + if (acpi_wakeaddr == 0) + return; + + sc->acpi_waketag = acpi_waketag; + sc->acpi_wakeaddr = acpi_wakeaddr; + sc->acpi_wakemap = acpi_wakemap; + + bus_dmamap_load(sc->acpi_waketag, sc->acpi_wakemap, + (void *)sc->acpi_wakeaddr, PAGE_SIZE * 4, acpi_realmodeinst, sc, 0); } Index: sys/amd64/acpica/genwakecode.sh =================================================================== RCS file: sys/amd64/acpica/genwakecode.sh diff -N sys/amd64/acpica/genwakecode.sh --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/amd64/acpica/genwakecode.sh 11 Dec 2008 00:41:44 -0000 @@ -0,0 +1,6 @@ +#!/bin/sh +# $FreeBSD$ +# +file2c 'static char wakecode[] = {' '};' #include +#include #include #include #include @@ -102,6 +103,8 @@ extern pt_entry_t *SMPpt; extern int _udatasel; struct pcb stoppcbs[MAXCPU]; +struct xpcb stopxpcbs[MAXCPU]; +struct savefpu stopfpus[MAXCPU]; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; @@ -341,6 +344,9 @@ cpu_mp_start(void) /* Install an inter-CPU IPI for CPU stop/restart */ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYSIGT, SEL_KPL, 0); + /* Install an inter-CPU IPI for CPU suspend/resume */ + setidt(IPI_SUSPEND, IDTVEC(cpususpend), SDT_SYSIGT, SEL_KPL, 0); + /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { boot_cpu_id = PCPU_GET(apic_id); @@ -1106,6 +1112,35 @@ cpustop_handler(void) } /* + * Handle an IPI_SUSPEND by saving our current context and spinning until we + * are resumed. + */ +void +cpususpend_handler(void) +{ + int cpu = PCPU_GET(cpuid); + int cpumask = PCPU_GET(cpumask); + + if (savectx2(&stopxpcbs[cpu])) { + fpugetregs(curthread, &stopfpus[cpu]); + wbinvd(); + + /* Indicate that we are suspended. */ + atomic_set_int(&stopped_cpus, cpumask); + } else { + fpusetregs(curthread, &stopfpus[cpu]); + lapic_setup(0); + } + + /* Wait for resume */ + while (!(started_cpus & cpumask)) + ia32_pause(); + + atomic_clear_int(&started_cpus, cpumask); + atomic_clear_int(&stopped_cpus, cpumask); +} + +/* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ Index: sys/amd64/include/apicvar.h =================================================================== RCS file: /home/ncvs/src/sys/amd64/include/apicvar.h,v retrieving revision 1.25 diff -p -u -r1.25 apicvar.h --- sys/amd64/include/apicvar.h 8 May 2007 22:01:03 -0000 1.25 +++ sys/amd64/include/apicvar.h 11 Dec 2008 00:41:50 -0000 @@ -130,6 +130,7 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ +#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ /* * The spurious interrupt can share the priority class with the IPIs since Index: sys/amd64/include/pcb.h =================================================================== RCS file: /home/ncvs/src/sys/amd64/include/pcb.h,v retrieving revision 1.66 diff -p -u -r1.66 pcb.h --- sys/amd64/include/pcb.h 8 Sep 2008 09:59:05 -0000 1.66 +++ sys/amd64/include/pcb.h 11 Dec 2008 00:41:50 -0000 @@ -80,11 +80,25 @@ struct pcb { struct user_segment_descriptor pcb_gs32sd; }; +struct xpcb { + struct pcb xpcb_pcb; + register_t xpcb_cr0; + register_t xpcb_cr2; + register_t xpcb_cr4; + register_t xpcb_kgsbase; + uint32_t xpcb_ss; + struct region_descriptor xpcb_gdt; + struct region_descriptor xpcb_idt; + struct region_descriptor xpcb_ldt; + uint16_t xpcb_tr; +}; + #ifdef _KERNEL struct trapframe; void makectx(struct trapframe *, struct pcb *); void savectx(struct pcb *); +int savectx2(struct xpcb *); #endif #endif /* _AMD64_PCB_H_ */ Index: sys/amd64/include/smp.h =================================================================== RCS file: /home/ncvs/src/sys/amd64/include/smp.h,v retrieving revision 1.93 diff -p -u -r1.93 smp.h --- sys/amd64/include/smp.h 28 Sep 2008 18:34:14 -0000 1.93 +++ sys/amd64/include/smp.h 11 Dec 2008 00:41:50 -0000 @@ -34,6 +34,7 @@ extern u_int32_t mptramp_pagetables; extern int mp_naps; extern int boot_cpu_id; extern struct pcb stoppcbs[]; +extern struct xpcb stopxpcbs[]; extern int cpu_apic_ids[]; /* global data in identcpu.c */ @@ -48,11 +49,13 @@ inthand_t IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ + IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous); /* handle CPU rendezvous */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); +void cpususpend_handler(void); void init_secondary(void); void ipi_selected(u_int cpus, u_int ipi); void ipi_all_but_self(u_int ipi); Index: sys/conf/files.amd64 =================================================================== RCS file: /home/ncvs/src/sys/conf/files.amd64,v retrieving revision 1.126 diff -p -u -r1.126 files.amd64 --- sys/conf/files.amd64 1 Dec 2008 16:53:01 -0000 1.126 +++ sys/conf/files.amd64 11 Dec 2008 00:41:51 -0000 @@ -69,6 +69,19 @@ hptrr_lib.o optional hptrr \ # amd64/acpica/OsdEnvironment.c optional acpi amd64/acpica/acpi_machdep.c optional acpi +amd64/acpica/acpi_switch.S optional acpi +acpi_wakecode.h optional acpi \ + dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \ + compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \ + no-obj no-implicit-rule before-depend \ + clean "acpi_wakecode.h acpi_wakecode.o acpi_wakecode.bin" +# +acpi_wakedata.h optional acpi \ + dependency "$S/amd64/acpica/acpi_wakecode.S assym.s" \ + compile-with "${MAKE} -f $S/amd64/acpica/Makefile ${.TARGET} MAKESRCPATH=$S/amd64/acpica" \ + no-obj no-implicit-rule before-depend \ + clean "acpi_wakedata.h acpi_wakecode.o acpi_wakecode.bin" +# amd64/acpica/acpi_wakeup.c optional acpi amd64/acpica/madt.c optional acpi amd64/amd64/amd64_mem.c optional mem Index: sys/dev/acpica/acpi.c =================================================================== RCS file: /home/ncvs/src/sys/dev/acpica/acpi.c,v retrieving revision 1.254 diff -p -u -r1.254 acpi.c --- sys/dev/acpica/acpi.c 18 Nov 2008 21:01:54 -0000 1.254 +++ sys/dev/acpica/acpi.c 11 Dec 2008 00:41:55 -0000 @@ -46,6 +46,9 @@ __FBSDID("$FreeBSD: src/sys/dev/acpica/a #include #include #include +#ifdef SMP +#include +#endif #include #if defined(__i386__) || defined(__amd64__) @@ -2274,6 +2277,7 @@ acpi_SetSleepState(struct acpi_softc *sc return (acpi_EnterSleepState(sc, state)); } +#if defined(__amd64__) || defined(__i386__) static void acpi_sleep_force(void *arg) { @@ -2284,6 +2288,7 @@ acpi_sleep_force(void *arg) if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate))) printf("acpi: force sleep state S%d failed\n", sc->acpi_next_sstate); } +#endif /* * Request that the system enter the given suspend state. All /dev/apm @@ -2294,7 +2299,9 @@ acpi_sleep_force(void *arg) int acpi_ReqSleepState(struct acpi_softc *sc, int state) { +#if defined(__i386__) struct apm_clone_data *clone; +#endif if (state < ACPI_STATE_S1 || state > ACPI_STATE_S5) return (EINVAL); @@ -2307,11 +2314,7 @@ acpi_ReqSleepState(struct acpi_softc *sc return (ENXIO); } -#if !defined(__i386__) - /* This platform does not support acpi suspend/resume. */ - return (EOPNOTSUPP); -#endif - +#if defined(__amd64__) || defined(__i386__) /* If a suspend request is already in progress, just return. */ ACPI_LOCK(acpi); if (sc->acpi_next_sstate != 0) { @@ -2321,6 +2324,7 @@ acpi_ReqSleepState(struct acpi_softc *sc /* Record the pending state and notify all apm devices. */ sc->acpi_next_sstate = state; +#if defined(__i386__) STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) { clone->notify_status = APM_EV_NONE; if ((clone->flags & ACPI_EVF_DEVD) == 0) { @@ -2328,6 +2332,7 @@ acpi_ReqSleepState(struct acpi_softc *sc KNOTE_UNLOCKED(&clone->sel_read.si_note, 0); } } +#endif /* If devd(8) is not running, immediately enter the sleep state. */ if (devctl_process_running() == FALSE) { @@ -2352,6 +2357,10 @@ acpi_ReqSleepState(struct acpi_softc *sc callout_reset(&sc->susp_force_to, 10 * hz, acpi_sleep_force, sc); ACPI_UNLOCK(acpi); return (0); +#else + /* This platform does not support acpi suspend/resume. */ + return (EOPNOTSUPP); +#endif } /* @@ -2364,14 +2373,10 @@ acpi_ReqSleepState(struct acpi_softc *sc int acpi_AckSleepState(struct apm_clone_data *clone, int error) { +#if defined(__amd64__) || defined(__i386__) struct acpi_softc *sc; int ret, sleeping; -#if !defined(__i386__) - /* This platform does not support acpi suspend/resume. */ - return (EOPNOTSUPP); -#endif - /* If no pending sleep state, return an error. */ ACPI_LOCK(acpi); sc = clone->acpi_sc; @@ -2395,8 +2400,9 @@ acpi_AckSleepState(struct apm_clone_data * all devices, seeing if they agree yet. We only count devices that * are writable since read-only devices couldn't ack the request. */ - clone->notify_status = APM_EV_ACKED; sleeping = TRUE; +#if defined(__i386__) + clone->notify_status = APM_EV_ACKED; STAILQ_FOREACH(clone, &sc->apm_cdevs, entries) { if ((clone->flags & ACPI_EVF_WRITE) != 0 && clone->notify_status != APM_EV_ACKED) { @@ -2404,6 +2410,7 @@ acpi_AckSleepState(struct apm_clone_data break; } } +#endif /* If all devices have voted "yes", we will suspend now. */ if (sleeping) @@ -2414,8 +2421,11 @@ acpi_AckSleepState(struct apm_clone_data if (ACPI_FAILURE(acpi_EnterSleepState(sc, sc->acpi_next_sstate))) ret = ENODEV; } - return (ret); +#else + /* This platform does not support acpi suspend/resume. */ + return (EOPNOTSUPP); +#endif } static void @@ -2459,11 +2469,18 @@ acpi_EnterSleepState(struct acpi_softc * sc->acpi_sleep_disabled = 1; ACPI_UNLOCK(acpi); +#ifdef SMP + thread_lock(curthread); + sched_bind(curthread, 0); + thread_unlock(curthread); +#endif + /* * Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE * drivers need this. */ mtx_lock(&Giant); + slp_state = ACPI_SS_NONE; switch (state) { case ACPI_STATE_S1: @@ -2570,6 +2587,13 @@ acpi_EnterSleepState(struct acpi_softc * acpi_UserNotify("Resume", ACPI_ROOT_OBJECT, state); mtx_unlock(&Giant); + +#ifdef SMP + thread_lock(curthread); + sched_unbind(curthread); + thread_unlock(curthread); +#endif + return_ACPI_STATUS (status); } Index: sys/dev/acpica/acpi_ec.c =================================================================== RCS file: /home/ncvs/src/sys/dev/acpica/acpi_ec.c,v retrieving revision 1.80 diff -p -u -r1.80 acpi_ec.c --- sys/dev/acpica/acpi_ec.c 8 Nov 2007 21:20:34 -0000 1.80 +++ sys/dev/acpica/acpi_ec.c 11 Dec 2008 00:41:57 -0000 @@ -747,7 +747,7 @@ EcSpaceHandler(UINT32 Function, ACPI_PHY * If booting, check if we need to run the query handler. If so, we * we call it directly here since our thread taskq is not active yet. */ - if (cold || rebooting) { + if (cold || rebooting || sc->ec_suspending) { if ((EC_GET_CSR(sc) & EC_EVENT_SCI)) { CTR0(KTR_ACPI, "ec running gpe handler directly"); EcGpeQueryHandler(sc); Index: sys/i386/i386/i686_mem.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/i686_mem.c,v retrieving revision 1.29 diff -p -u -r1.29 i686_mem.c --- sys/i386/i386/i686_mem.c 26 Nov 2008 19:25:13 -0000 1.29 +++ sys/i386/i386/i686_mem.c 11 Dec 2008 00:41:57 -0000 @@ -73,11 +73,13 @@ static void i686_mrinit(struct mem_range static int i686_mrset(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); static void i686_mrAPinit(struct mem_range_softc *sc); +static void i686_mrreinit(struct mem_range_softc *sc); static struct mem_range_ops i686_mrops = { i686_mrinit, i686_mrset, - i686_mrAPinit + i686_mrAPinit, + i686_mrreinit }; /* XXX for AP startup hook */ @@ -668,6 +670,39 @@ i686_mrAPinit(struct mem_range_softc *sc wrmsr(MSR_MTRRdefType, mtrrdef); } +/* + * Re-initialise running CPU(s) MTRRs to match the ranges in the descriptor + * list. + * + * XXX Must be called with interrupts enabled. + */ +static void +i686_mrreinit(struct mem_range_softc *sc) +{ +#ifdef SMP + /* + * We should use ipi_all_but_self() to call other CPUs into a + * locking gate, then call a target function to do this work. + * The "proper" solution involves a generalised locking gate + * implementation, not ready yet. + */ + smp_rendezvous(NULL, i686_mrAPinit, NULL, sc); +#else + disable_intr(); /* disable interrupts */ + i686_mrAPinit(sc); + enable_intr(); +#endif +} + +static void +i686_mrreinitone(void *arg) +{ + struct mem_range_softc *sc = arg; + + i686_mrstoreone(sc); + wrmsr(MSR_MTRRdefType, mtrrdef); +} + static void i686_mem_drvinit(void *unused) { Index: sys/i386/i386/k6_mem.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/k6_mem.c,v retrieving revision 1.15 diff -p -u -r1.15 k6_mem.c --- sys/i386/i386/k6_mem.c 26 Nov 2008 19:25:13 -0000 1.15 +++ sys/i386/i386/k6_mem.c 11 Dec 2008 00:41:57 -0000 @@ -70,6 +70,7 @@ static struct mem_range_ops k6_mrops = { k6_mrinit, k6_mrset, + NULL, NULL }; Index: sys/kern/subr_smp.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_smp.c,v retrieving revision 1.209 diff -p -u -r1.209 subr_smp.c --- sys/kern/subr_smp.c 3 Nov 2008 21:17:02 -0000 1.209 +++ sys/kern/subr_smp.c 11 Dec 2008 00:41:58 -0000 @@ -262,6 +262,54 @@ stop_cpus(cpumask_t map) return 1; } +#if defined(__amd64__) +/* + * When called the executing CPU will send an IPI to all other CPUs + * requesting that they halt execution. + * + * Usually (but not necessarily) called with 'other_cpus' as its arg. + * + * - Signals all CPUs in map to suspend. + * - Waits for each to suspend. + * + * Returns: + * -1: error + * 0: NA + * 1: ok + * + * XXX FIXME: this is not MP-safe, needs a lock to prevent multiple CPUs + * from executing at same time. + */ +int +suspend_cpus(cpumask_t map) +{ + int i; + + if (!smp_started) + return (0); + + CTR1(KTR_SMP, "suspend_cpus(%x)", map); + + /* send the suspend IPI to all CPUs in map */ + ipi_selected(map, IPI_SUSPEND); + + i = 0; + while ((stopped_cpus & map) != map) { + /* spin */ + cpu_spinwait(); + i++; +#ifdef DIAGNOSTIC + if (i == 100000) { + printf("timeout suspending cpus\n"); + break; + } +#endif + } + + return (1); +} +#endif + /* * Called by a CPU to restart stopped CPUs. * Index: sys/sys/memrange.h =================================================================== RCS file: /home/ncvs/src/sys/sys/memrange.h,v retrieving revision 1.7 diff -p -u -r1.7 memrange.h --- sys/sys/memrange.h 4 Aug 2004 18:30:31 -0000 1.7 +++ sys/sys/memrange.h 11 Dec 2008 00:41:58 -0000 @@ -52,6 +52,7 @@ struct mem_range_ops void (*init)(struct mem_range_softc *sc); int (*set)(struct mem_range_softc *sc, struct mem_range_desc *mrd, int *arg); void (*initAP)(struct mem_range_softc *sc); + void (*reinit)(struct mem_range_softc *sc); }; struct mem_range_softc @@ -68,4 +69,3 @@ extern int mem_range_attr_get(struct mem extern int mem_range_attr_set(struct mem_range_desc *mrd, int *arg); #endif - Index: sys/sys/smp.h =================================================================== RCS file: /home/ncvs/src/sys/sys/smp.h,v retrieving revision 1.88 diff -p -u -r1.88 smp.h --- sys/sys/smp.h 23 May 2008 04:05:26 -0000 1.88 +++ sys/sys/smp.h 11 Dec 2008 00:41:58 -0000 @@ -122,6 +122,9 @@ void forward_signal(struct thread *); void forward_roundrobin(void); int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); +#if defined(__amd64__) +int suspend_cpus(cpumask_t); +#endif void smp_rendezvous_action(void); extern struct mtx smp_ipi_mtx;