Index: i386/acpica/acpi_wakecode.S =================================================================== RCS file: /home/ncvs/src/sys/i386/acpica/acpi_wakecode.S,v retrieving revision 1.13.10.1 diff -u -r1.13.10.1 acpi_wakecode.S --- i386/acpica/acpi_wakecode.S 3 Aug 2009 08:13:06 -0000 1.13.10.1 +++ i386/acpica/acpi_wakecode.S 6 May 2012 12:22:27 -0000 @@ -1,6 +1,7 @@ /*- * Copyright (c) 2001 Takanori Watanabe - * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2001-2012 Mitsuru IWASAKI + * Copyright (c) 2008-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,8 +28,6 @@ * $FreeBSD: src/sys/i386/acpica/acpi_wakecode.S,v 1.13.10.1 2009/08/03 08:13:06 kensmith Exp $ */ -#define LOCORE - #include #include @@ -41,221 +40,211 @@ * Depending on the previous sleep state, we may need to initialize more * of the system (i.e., S3 suspend-to-RAM vs. S4 suspend-to-disk). */ - .align 4 - .code16 -wakeup_16: - nop - cli - cld + .data /* So we can modify it */ + + ALIGN_TEXT + .code16 +wakeup_start: /* * Set up segment registers for real mode, a small stack for * any calls we make, and clear any flags. */ - movw %cs,%ax - movw %ax,%ds - movw %ax,%ss - movw $PAGE_SIZE,%sp - pushl $0 - popfl + cli /* make sure no interrupts */ + mov %cs, %ax /* copy %cs to %ds. Remember these */ + mov %ax, %ds /* are offsets rather than selectors */ + mov %ax, %ss + movw $PAGE_SIZE, %sp + xorw %ax, %ax + pushw %ax + popfw /* To debug resume hangs, beep the speaker if the user requested. */ - cmpl $1,resume_beep - jne nobeep - movb $0xc0,%al - outb %al,$0x42 - movb $0x04,%al - outb %al,$0x42 - inb $0x61,%al - orb $0x3,%al - outb %al,$0x61 -nobeep: + testb $~0, resume_beep - wakeup_start + jz 1f + movb $0, resume_beep - wakeup_start + movb $0xc0, %al + outb %al, $0x42 + movb $0x04, %al + outb %al, $0x42 + inb $0x61, %al + orb $0x3, %al + outb %al, $0x61 +1: /* Re-initialize video BIOS if the reset_video tunable is set. */ - cmpl $1,reset_video - jne nobiosreset - lcall $0xc000,$3 + testb $~0, reset_video - wakeup_start + jz 1f + movb $0, reset_video - wakeup_start + lcall $0xc000, $3 + + /* When we reach here, int 0x10 should be ready. Hide cursor. */ + movb $0x01, %ah + movb $0x20, %ch + int $0x10 + + /* Re-start in case the previous BIOS call clobbers them. */ + jmp wakeup_start +1: /* - * Set up segment registers for real mode again in case the - * previous BIOS call clobbers them. + * Find relocation base and patch the gdt descript and ljmp targets */ - movw %cs,%ax - movw %ax,%ds - movw %ax,%ss -nobiosreset: - - /* Load GDT for real mode. Use 32 bit prefix for addresses >16 MB. */ - lgdtl physical_gdt - - /* Restore CR2, CR3 and CR4 */ - movl previous_cr2,%eax - movl %eax,%cr2 - movl previous_cr3,%eax - movl %eax,%cr3 - movl previous_cr4,%eax - movl %eax,%cr4 - - /* Transfer some values to protected mode with an inline stack */ -#define NVALUES 9 -#define TRANSFER_STACK32(val, idx) \ - movl val,%eax; \ - movl %eax,wakeup_32stack+(idx+1)+(idx*4) - - TRANSFER_STACK32(previous_ss, (NVALUES - 9)) - TRANSFER_STACK32(previous_fs, (NVALUES - 8)) - TRANSFER_STACK32(previous_ds, (NVALUES - 7)) - TRANSFER_STACK32(physical_gdt+2, (NVALUES - 6)) - TRANSFER_STACK32(where_to_recover, (NVALUES - 5)) - TRANSFER_STACK32(previous_idt+2, (NVALUES - 4)) - TRANSFER_STACK32(previous_ldt, (NVALUES - 3)) - TRANSFER_STACK32(previous_gdt+2, (NVALUES - 2)) - TRANSFER_STACK32(previous_tr, (NVALUES - 1)) - TRANSFER_STACK32(previous_cr0, (NVALUES - 0)) + xorl %ebx, %ebx + mov %cs, %bx + sall $4, %ebx /* %ebx is now our relocation base */ - mov physical_esp,%esi /* to be used in 32bit code */ + /* + * Load the descriptor table pointer. We'll need it when running + * in 16-bit protected mode. + */ + lgdtl bootgdtdesc - wakeup_start /* Enable protected mode */ - movl %cr0,%eax - orl $(CR0_PE),%eax - movl %eax,%cr0 + movl $CR0_PE, %eax + mov %eax, %cr0 + /* + * Now execute a far jump to turn on protected mode. This + * causes the segment registers to turn into selectors and causes + * %cs to be loaded from the gdt. + * + * The following instruction is: + * ljmpl $bootcode32 - bootgdt, $wakeup_32 - wakeup_start + * but gas cannot assemble that. And besides, we patch the targets + * in early startup and its a little clearer what we are patching. + */ wakeup_sw32: - /* Switch to protected mode by intersegmental jump */ - ljmpl $KCSEL,$0x12345678 /* Code location, to be replaced */ + .byte 0x66 /* size override to 32 bits */ + .byte 0xea /* opcode for far jump */ + .long wakeup_32 - wakeup_start /* offset in segment */ + .word bootcode32 - bootgdt /* index in gdt for 32 bit code */ /* - * Now switched to protected mode without paging enabled. - * %esi: KERNEL stack pointer (physical address) + * At this point, we are running in 32 bit legacy protected mode. */ + ALIGN_TEXT .code32 wakeup_32: - nop - /* Set up segment registers for protected mode */ - movw $KDSEL,%ax /* KDSEL to segment registers */ - movw %ax,%ds - movw %ax,%es - movw %ax,%gs - movw %ax,%ss - movw $KPSEL,%ax /* KPSEL to %fs */ - movw %ax,%fs - movl %esi,%esp /* physical address stack pointer */ - -wakeup_32stack: - /* Operands are overwritten in 16 bit code by TRANSFER_STACK32 macro */ - pushl $0xabcdef09 /* ss + dummy */ - pushl $0xabcdef08 /* fs + gs */ - pushl $0xabcdef07 /* ds + es */ - pushl $0xabcdef06 /* gdt:base (physical address) */ - pushl $0xabcdef05 /* recover address */ - pushl $0xabcdef04 /* idt:base */ - pushl $0xabcdef03 /* ldt + idt:limit */ - pushl $0xabcdef02 /* gdt:base */ - pushl $0xabcdef01 /* TR + gdt:limit */ - pushl $0xabcdef00 /* CR0 */ - - movl %esp,%ebp -#define CR0_REGISTER 0(%ebp) -#define TASK_REGISTER 4(%ebp) -#define PREVIOUS_GDT 6(%ebp) -#define PREVIOUS_LDT 12(%ebp) -#define PREVIOUS_IDT 14(%ebp) -#define RECOVER_ADDR 20(%ebp) -#define PHYSICAL_GDT_BASE 24(%ebp) -#define PREVIOUS_DS 28(%ebp) -#define PREVIOUS_ES 30(%ebp) -#define PREVIOUS_FS 32(%ebp) -#define PREVIOUS_GS 34(%ebp) -#define PREVIOUS_SS 36(%ebp) - - /* Fixup TSS type field */ -#define TSS_TYPEFIX_MASK 0xf9 - xorl %esi,%esi - movl PHYSICAL_GDT_BASE,%ebx - movw TASK_REGISTER,%si - leal (%ebx,%esi),%eax /* get TSS segment descriptor */ - andb $TSS_TYPEFIX_MASK,5(%eax) - - /* Prepare to return to sleep/wakeup code point */ - lgdtl PREVIOUS_GDT - lidtl PREVIOUS_IDT - - /* Pack values from the GDT to be loaded into segment registers. */ - movl PREVIOUS_DS,%ebx - movl PREVIOUS_FS,%ecx - movl PREVIOUS_SS,%edx - movw TASK_REGISTER,%si - shll $16,%esi - movw PREVIOUS_LDT,%si - movl RECOVER_ADDR,%edi - - /* Enable paging and etc. */ - movl CR0_REGISTER,%eax - movl %eax,%cr0 + mov $bootdata32 - bootgdt, %eax + mov %ax, %ds + + /* Restore CR2, CR4 and CR3. */ + movl wakeup_cr2 - wakeup_start(%ebx), %eax + mov %eax, %cr2 + movl wakeup_cr4 - wakeup_start(%ebx), %eax + mov %eax, %cr4 + + movl wakeup_ctx - wakeup_start(%ebx), %esi + + movl wakeup_cr3 - wakeup_start(%ebx), %eax + mov %eax, %cr3 + + /* + * Finally, switch to long bit mode by enabling paging. We have + * to be very careful here because all the segmentation disappears + * out from underneath us. The spec says we can depend on the + * subsequent pipelined branch to execute, but *only if* everthing + * is still identity mapped. If any mappings change, the pipeline + * will flush. + */ + mov %cr0, %eax + orl $CR0_PG, %eax + mov %eax, %cr0 - /* Flush the prefetch queue */ jmp 1f 1: jmp 1f 1: + movl %esi, %ebx +#define WAKEUP_CTX(member) wakeup_ ## member - wakeup_ctx(%ebx) - /* - * Now we are in kernel virtual memory addressing with the following - * original register values: - * %ebx: ds + es - * %ecx: fs + gs - * %edx: ss + dummy - * %esi: LDTR + TR - * %edi: recover address - * We'll load these back into the segment registers now. - */ - nop + /* Restore GDT. */ + lgdt WAKEUP_CTX(gdt) + jmp 1f +1: - movl %esi,%eax /* LDTR + TR */ - lldt %ax /* load LDT register */ - shrl $16,%eax - ltr %ax /* load task register */ - - /* Restore segment registers */ - movl %ebx,%eax /* ds + es */ - movw %ax,%ds - shrl $16,%eax - movw %ax,%es - movl %ecx,%eax /* fs + gs */ - movw %ax,%fs - shrl $16,%eax - movw %ax,%gs - movl %edx,%eax /* ss */ - movw %ax,%ss + /* Restore segment registers. */ + xorl %eax, %eax + movw WAKEUP_CTX(ds), %ax + movw %ax, %ds + movw WAKEUP_CTX(es), %ax + movw %ax, %es + movw WAKEUP_CTX(fs), %ax + movw %ax, %fs + movw WAKEUP_CTX(gs), %ax + movw %ax, %gs + movw WAKEUP_CTX(ss), %ax + movw %ax, %ss + + /* Restore CR0. */ + movl WAKEUP_CTX(cr0), %eax + mov %eax, %cr0 + + /* Indicate the CPU is resumed. */ + xorl %eax, %eax + movl %eax, WAKEUP_CTX(cpu) /* Jump to acpi_restorecpu() */ + movl WAKEUP_CTX(retaddr), %edi jmp *%edi -/* used in real mode */ -physical_gdt: .word 0 - .long 0 -physical_esp: .long 0 -previous_cr2: .long 0 -previous_cr3: .long 0 -previous_cr4: .long 0 -resume_beep: .long 0 -reset_video: .long 0 + .data -/* - * Transfer from real mode to protected mode. The order of these variables - * is very important, DO NOT INSERT OR CHANGE unless you know why. - */ -previous_cr0: .long 0 -previous_tr: .word 0 -previous_gdt: .word 0 - .long 0 -previous_ldt: .word 0 -previous_idt: .word 0 - .long 0 -where_to_recover: .long 0 -previous_ds: .word 0 -previous_es: .word 0 -previous_fs: .word 0 -previous_gs: .word 0 -previous_ss: .word 0 -dummy: .word 0 +resume_beep: + .byte 0 +reset_video: + .byte 0 + + ALIGN_DATA +bootgdt: + .long 0x00000000 + .long 0x00000000 + +bootcode32: + .long 0x0000ffff + .long 0x00cf9b00 + +bootdata32: + .long 0x0000ffff + .long 0x00cf9300 +bootgdtend: + +bootgdtdesc: + .word bootgdtend - bootgdt /* Length */ + .long bootgdt - wakeup_start /* Offset plus %ds << 4 */ + + ALIGN_DATA +wakeup_ctx: + .long 0 +wakeup_cr2: + .long 0 +wakeup_cr3: + .long 0 +wakeup_cr4: + .long 0 + +wakeup_gdt: + .word 0 + .long 0 + + ALIGN_DATA +wakeup_ds: + .word 0 +wakeup_es: + .word 0 +wakeup_fs: + .word 0 +wakeup_gs: + .word 0 +wakeup_ss: + .word 0 + + ALIGN_DATA +wakeup_cr0: + .long 0 +wakeup_retaddr: + .long 0 +wakeup_cpu: + .long 0 +dummy: Index: i386/acpica/acpi_wakeup.c =================================================================== RCS file: /home/ncvs/src/sys/i386/acpica/acpi_wakeup.c,v retrieving revision 1.50.2.3 diff -u -r1.50.2.3 acpi_wakeup.c --- i386/acpica/acpi_wakeup.c 11 Apr 2012 19:37:36 -0000 1.50.2.3 +++ i386/acpica/acpi_wakeup.c 6 May 2012 14:06:42 -0000 @@ -1,6 +1,7 @@ /*- * Copyright (c) 2001 Takanori Watanabe - * Copyright (c) 2001 Mitsuru IWASAKI + * Copyright (c) 2001-2012 Mitsuru IWASAKI + * Copyright (c) 2008-2012 Jung-uk Kim * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -36,6 +37,7 @@ #include #include #include +#include #include #include @@ -48,15 +50,22 @@ #include #include #include +#include #include +#ifdef SMP +#include +#include +#include +#endif + #include #include #include "acpi_wakecode.h" -/* Make sure the code is less than one page and leave room for the stack. */ +/* Make sure the code is less than a page and leave room for the stack. */ CTASSERT(sizeof(wakecode) < PAGE_SIZE - 1024); #ifndef _SYS_CDEFS_H_ @@ -67,8 +76,13 @@ extern uint32_t acpi_reset_video; extern void initializecpu(void); +#ifdef SMP +extern struct pcb **susppcbs; +#else +static struct pcb **susppcbs; +#endif + static struct region_descriptor __used saved_idt, saved_gdt; -static struct region_descriptor *p_gdt; static uint16_t __used saved_ldt; static uint32_t __used r_eax, r_ebx, r_ecx, r_edx, r_ebp, r_esi, r_edi, @@ -78,9 +92,21 @@ static uint32_t __used r_esp; static void acpi_printcpu(void); -static void acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, - int nsegs, int error); -static void acpi_alloc_wakeup_handler(void); +static void *acpi_alloc_wakeup_handler(void); + +#ifdef SMP +static int acpi_wakeup_ap(struct acpi_softc *, int); +static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +#endif + +#define ACPI_PAGETABLES 0 +#define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (ACPI_PAGETABLES * PAGE_SIZE)) +#define WAKECODE_PADDR(sc) ((sc)->acpi_wakephys + (ACPI_PAGETABLES * PAGE_SIZE)) +#define WAKECODE_FIXUP(offset, type, val) do { \ + type *addr; \ + addr = (type *)(WAKECODE_VADDR(sc) + offset); \ + *addr = val; \ +} while (0) /* XXX shut gcc up */ extern int acpi_savecpu(void); @@ -173,18 +199,6 @@ r_cs, r_ds, r_es, r_fs, r_gs, r_ss); } -#define WAKECODE_FIXUP(offset, type, val) do { \ - type *addr; \ - addr = (type *)(sc->acpi_wakeaddr + offset); \ - *addr = val; \ -} while (0) - -#define WAKECODE_BCOPY(offset, type, val) do { \ - void *addr; \ - addr = (void *)(sc->acpi_wakeaddr + offset); \ - bcopy(&(val), addr, sizeof(type)); \ -} while (0) - /* Turn off bits 1&2 of the PIT, stopping the beep. */ static void acpi_stop_beep(void *arg) @@ -192,28 +206,163 @@ outb(0x61, inb(0x61) & ~0x3); } +#ifdef SMP +static int +acpi_wakeup_ap(struct acpi_softc *sc, int cpu) +{ + int vector = (WAKECODE_PADDR(sc) >> 12) & 0xff; + int apic_id = cpu_apic_ids[cpu]; + int ms; + + r_cr3 = susppcbs[cpu]->pcb_cr3; + r_edi = susppcbs[cpu]->pcb_edi; + r_esi = susppcbs[cpu]->pcb_esi; + r_ebp = susppcbs[cpu]->pcb_ebp; + r_esp = susppcbs[cpu]->pcb_esp; + r_ebx = susppcbs[cpu]->pcb_ebx; + ret_addr = susppcbs[cpu]->pcb_eip; + + r_gs = susppcbs[cpu]->pcb_gs; + r_efl = susppcbs[cpu]->pcb_psl; + + WAKECODE_FIXUP(resume_beep, uint8_t, 0); + WAKECODE_FIXUP(reset_video, uint8_t, 0); + + WAKECODE_FIXUP(wakeup_cpu, int, cpu); + + /* do an INIT IPI: assert RESET */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); + + /* wait for pending status end */ + lapic_ipi_wait(-1); + + /* do an INIT IPI: deassert RESET */ + lapic_ipi_raw(APIC_DEST_ALLESELF | APIC_TRIGMOD_LEVEL | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, 0); + + /* wait for pending status end */ + DELAY(10000); /* wait ~10mS */ + lapic_ipi_wait(-1); + + /* + * next we do a STARTUP IPI: the previous INIT IPI might still be + * latched, (P5 bug) this 1st STARTUP would then terminate + * immediately, and the previously started INIT IPI would continue. OR + * the previous INIT IPI has already run. and this STARTUP IPI will + * run. OR the previous INIT IPI was ignored. and this STARTUP IPI + * will run. + */ + + /* do a STARTUP IPI */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* + * finally we do a 2nd STARTUP IPI: this 2nd STARTUP IPI should run IF + * the previous STARTUP IPI was cancelled by a latched INIT IPI. OR + * this STARTUP IPI will be ignored, as only ONE STARTUP IPI is + * recognized after hardware RESET or INIT IPI. + */ + + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + vector, apic_id); + lapic_ipi_wait(-1); + DELAY(200); /* wait ~200uS */ + + /* Wait up to 5 seconds for it to start. */ + for (ms = 0; ms < 5000; ms++) { + if (*(int *)(WAKECODE_VADDR(sc) + wakeup_cpu) == 0) + return (1); /* return SUCCESS */ + DELAY(1000); + } + return (0); /* return FAILURE */ +} + +#define WARMBOOT_TARGET 0 +#define WARMBOOT_OFF (KERNBASE + 0x0467) +#define WARMBOOT_SEG (KERNBASE + 0x0469) + +#define CMOS_REG (0x70) +#define CMOS_DATA (0x71) +#define BIOS_RESET (0x0f) +#define BIOS_WARM (0x0a) + +static void +acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +{ + uint32_t mpbioswarmvec; + int cpu; + u_char mpbiosreason; + + /* save the current value of the warm-start vector */ + mpbioswarmvec = *((uint32_t *)WARMBOOT_OFF); + outb(CMOS_REG, BIOS_RESET); + mpbiosreason = inb(CMOS_DATA); + + /* setup a vector to our boot code */ + *((volatile u_short *)WARMBOOT_OFF) = WARMBOOT_TARGET; + *((volatile u_short *)WARMBOOT_SEG) = WAKECODE_PADDR(sc) >> 4; + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ + + /* Wake up each AP. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { + if ((wakeup_cpus & (1 << cpu)) == 0) + continue; + if (acpi_wakeup_ap(sc, cpu) == 0) { + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + panic("acpi_wakeup: failed to resume AP #%d (PHY #%d)", + cpu, cpu_apic_ids[cpu]); + } + } + + /* restore the warmstart vector */ + *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; + + outb(CMOS_REG, BIOS_RESET); + outb(CMOS_DATA, mpbiosreason); +} +#endif + int acpi_sleep_machdep(struct acpi_softc *sc, int state) { - ACPI_STATUS status; - struct pmap *pm; - int ret; - uint32_t cr3; - u_long ef; +#ifdef SMP + cpumask_t wakeup_cpus; +#endif + register_t cr3, rf; + ACPI_STATUS status; + struct pmap *pm; + int ret; ret = -1; - if (sc->acpi_wakeaddr == 0) + + if (sc->acpi_wakeaddr == 0ul) return (ret); - AcpiSetFirmwareWakingVector(sc->acpi_wakephys); +#ifdef SMP + while (PCPU_GET(cpuid) != 0) { + device_printf(sc->acpi_dev, "Waiting for BSP...\n"); + cpu_idle(0); + } + wakeup_cpus = PCPU_GET(other_cpus); +#endif + + AcpiSetFirmwareWakingVector(WAKECODE_PADDR(sc)); - ef = intr_disable(); + rf = intr_disable(); intr_suspend(); /* - * Temporarily switch to the kernel pmap because it provides an - * identity mapping (setup at boot) for the low physical memory - * region containing the wakeup code. + * Temporarily switch to the kernel pmap because it provides + * an identity mapping (setup at boot) for the low physical + * memory region containing the wakeup code. */ pm = kernel_pmap; cr3 = rcr3(); @@ -225,34 +374,35 @@ ret_addr = 0; if (acpi_savecpu()) { - /* Execute Sleep */ +#ifdef SMP + if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, + "Failed to suspend APs: CPU mask = 0x%jx\n", + (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + goto out; + } +#endif - p_gdt = (struct region_descriptor *) - (sc->acpi_wakeaddr + physical_gdt); - p_gdt->rd_limit = saved_gdt.rd_limit; - p_gdt->rd_base = vtophys(saved_gdt.rd_base); - - WAKECODE_FIXUP(physical_esp, uint32_t, vtophys(r_esp)); - WAKECODE_FIXUP(previous_cr0, uint32_t, r_cr0); - WAKECODE_FIXUP(previous_cr2, uint32_t, r_cr2); - WAKECODE_FIXUP(previous_cr3, uint32_t, r_cr3); - WAKECODE_FIXUP(previous_cr4, uint32_t, r_cr4); - - WAKECODE_FIXUP(resume_beep, uint32_t, acpi_resume_beep); - WAKECODE_FIXUP(reset_video, uint32_t, acpi_reset_video); - - WAKECODE_FIXUP(previous_tr, uint16_t, r_tr); - WAKECODE_BCOPY(previous_gdt, struct region_descriptor, saved_gdt); - WAKECODE_FIXUP(previous_ldt, uint16_t, saved_ldt); - WAKECODE_BCOPY(previous_idt, struct region_descriptor, saved_idt); - - WAKECODE_FIXUP(where_to_recover, void *, acpi_restorecpu); - - WAKECODE_FIXUP(previous_ds, uint16_t, r_ds); - WAKECODE_FIXUP(previous_es, uint16_t, r_es); - WAKECODE_FIXUP(previous_fs, uint16_t, r_fs); - WAKECODE_FIXUP(previous_gs, uint16_t, r_gs); - WAKECODE_FIXUP(previous_ss, uint16_t, r_ss); + WAKECODE_FIXUP(resume_beep, uint8_t, (acpi_resume_beep != 0)); + WAKECODE_FIXUP(reset_video, uint8_t, (acpi_reset_video != 0)); + + WAKECODE_FIXUP(wakeup_cr2, uint32_t, r_cr2); + WAKECODE_FIXUP(wakeup_cr4, uint32_t, r_cr4); + WAKECODE_FIXUP(wakeup_cr3, uint32_t, r_cr3); + + WAKECODE_FIXUP(wakeup_gdt, uint16_t, saved_gdt.rd_limit); + WAKECODE_FIXUP(wakeup_gdt + 2, uint32_t, saved_gdt.rd_base); + + WAKECODE_FIXUP(wakeup_ds, uint16_t, r_ds); + WAKECODE_FIXUP(wakeup_es, uint16_t, r_es); + WAKECODE_FIXUP(wakeup_fs, uint16_t, r_fs); + WAKECODE_FIXUP(wakeup_gs, uint16_t, r_gs); + WAKECODE_FIXUP(wakeup_ss, uint16_t, r_ss); + + WAKECODE_FIXUP(wakeup_cr0, uint32_t, r_cr0); + + WAKECODE_FIXUP(wakeup_retaddr, void *, acpi_restorecpu); + WAKECODE_FIXUP(wakeup_cpu, int, 0); if (bootverbose) acpi_printcpu(); @@ -265,14 +415,23 @@ if (status != AE_OK) { device_printf(sc->acpi_dev, - "AcpiEnterSleepState failed - %s\n", - AcpiFormatException(status)); + "AcpiEnterSleepState failed - %s\n", + AcpiFormatException(status)); goto out; } for (;;) ia32_pause(); } else { + /* Restore descriptor tables. */ + lgdt(&saved_gdt); + lidt(&saved_idt); + lldt(saved_ldt); + + /* Clear "task busy" bit and reload TR. */ + PCPU_GET(tss_gdt)->sd_type &= (~SDT_SYS386BSY | SDT_SYS386TSS); + ltr(r_tr); + pmap_init_pat(); PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); @@ -280,14 +439,25 @@ acpi_savecpu(); acpi_printcpu(); } +#ifdef SMP + if (wakeup_cpus != 0) + acpi_wakeup_cpus(sc, wakeup_cpus); +#endif ret = 0; } out: +#ifdef SMP + if (wakeup_cpus != 0) + restart_cpus(wakeup_cpus); +#endif + load_cr3(cr3); mca_resume(); intr_resume(); - intr_restore(ef); + intr_restore(rf); + + AcpiSetFirmwareWakingVector(0); if (ret == 0 && mem_range_softc.mr_op != NULL && mem_range_softc.mr_op->reinit != NULL) @@ -300,71 +470,60 @@ return (ret); } -static bus_dma_tag_t acpi_waketag; -static bus_dmamap_t acpi_wakemap; -static vm_offset_t acpi_wakeaddr; - -static void +static void * acpi_alloc_wakeup_handler(void) { - void *wakeaddr; - - if (!cold) - return; + void *wakeaddr; + int i; /* * Specify the region for our wakeup code. We want it in the low 1 MB - * region, excluding video memory and above (0xa0000). We ask for - * it to be page-aligned, just to be safe. + * region, excluding real mode IVT (0-0x3ff), BDA (0x400-0x4ff), EBDA + * (less than 128KB, below 0xa0000, must be excluded by SMAP and DSDT), + * and ROM area (0xa0000 and above). The temporary page tables must be + * page-aligned. */ - if (bus_dma_tag_create(/*parent*/ NULL, - /*alignment*/ PAGE_SIZE, /*no boundary*/ 0, - /*lowaddr*/ 0x9ffff, /*highaddr*/ BUS_SPACE_MAXADDR, NULL, NULL, - /*maxsize*/ PAGE_SIZE, /*segments*/ 1, /*maxsegsize*/ PAGE_SIZE, - 0, busdma_lock_mutex, &Giant, &acpi_waketag) != 0) { - printf("acpi_alloc_wakeup_handler: can't create wake tag\n"); - return; + wakeaddr = contigmalloc((ACPI_PAGETABLES + 1) * PAGE_SIZE, M_DEVBUF, + M_NOWAIT, 0x500, 0xa0000, PAGE_SIZE, 0ul); + if (wakeaddr == NULL) { + printf("%s: can't alloc wake memory\n", __func__); + return (NULL); } - if (bus_dmamem_alloc(acpi_waketag, &wakeaddr, BUS_DMA_NOWAIT, - &acpi_wakemap) != 0) { - printf("acpi_alloc_wakeup_handler: can't alloc wake memory\n"); - return; - } - acpi_wakeaddr = (vm_offset_t)wakeaddr; -} - -SYSINIT(acpiwakeup, SI_SUB_KMEM, SI_ORDER_ANY, acpi_alloc_wakeup_handler, 0); + susppcbs = malloc(mp_ncpus * sizeof(*susppcbs), M_DEVBUF, M_WAITOK); + for (i = 0; i < mp_ncpus; i++) + susppcbs[i] = malloc(sizeof(**susppcbs), M_DEVBUF, M_WAITOK); -static void -acpi_realmodeinst(void *arg, bus_dma_segment_t *segs, int nsegs, int error) -{ - struct acpi_softc *sc; - uint32_t *addr; - - /* Overwrite the ljmp target with the real address */ - sc = arg; - sc->acpi_wakephys = segs[0].ds_addr; - addr = (uint32_t *)&wakecode[wakeup_sw32 + 2]; - *addr = sc->acpi_wakephys + wakeup_32; - - /* Copy the wake code into our low page and save its physical addr. */ - bcopy(wakecode, (void *)sc->acpi_wakeaddr, sizeof(wakecode)); - if (bootverbose) { - device_printf(sc->acpi_dev, "wakeup code va %#x pa %#jx\n", - acpi_wakeaddr, (uintmax_t)sc->acpi_wakephys); - } + return (wakeaddr); } void acpi_install_wakeup_handler(struct acpi_softc *sc) { - if (acpi_wakeaddr == 0) + static void *wakeaddr = NULL; + + if (wakeaddr != NULL) + return; + + wakeaddr = acpi_alloc_wakeup_handler(); + if (wakeaddr == NULL) return; - sc->acpi_waketag = acpi_waketag; - sc->acpi_wakeaddr = acpi_wakeaddr; - sc->acpi_wakemap = acpi_wakemap; + sc->acpi_wakeaddr = (vm_offset_t)wakeaddr; + sc->acpi_wakephys = vtophys(wakeaddr); + + bcopy(wakecode, (void *)WAKECODE_VADDR(sc), sizeof(wakecode)); - bus_dmamap_load(sc->acpi_waketag, sc->acpi_wakemap, - (void *)sc->acpi_wakeaddr, PAGE_SIZE, acpi_realmodeinst, sc, 0); + /* Patch GDT base address, ljmp target. */ + WAKECODE_FIXUP((bootgdtdesc + 2), uint32_t, + WAKECODE_PADDR(sc) + bootgdt); + WAKECODE_FIXUP((wakeup_sw32 + 2), uint32_t, + WAKECODE_PADDR(sc) + wakeup_32); + + /* Save pointers to some global data. */ + WAKECODE_FIXUP(wakeup_ctx, vm_offset_t, + WAKECODE_VADDR(sc) + wakeup_ctx); + + if (bootverbose) + device_printf(sc->acpi_dev, "wakeup code va %p pa %p\n", + (void *)sc->acpi_wakeaddr, (void *)sc->acpi_wakephys); } Index: i386/i386/apic_vector.s =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/apic_vector.s,v retrieving revision 1.114.2.5 diff -u -r1.114.2.5 apic_vector.s --- i386/i386/apic_vector.s 15 Jul 2010 12:17:17 -0000 1.114.2.5 +++ i386/i386/apic_vector.s 1 May 2012 16:24:16 -0000 @@ -336,6 +336,24 @@ iret /* + * Executed by a CPU when it receives an IPI_SUSPEND from another CPU. + */ + .text + SUPERALIGN_TEXT +IDTVEC(cpususpend) + PUSH_FRAME + SET_KERNEL_SREGS + cld + + movl lapic, %eax + movl $0, LA_EOI(%eax) /* End Of Interrupt to APIC */ + + call cpususpend_handler + + POP_FRAME + jmp doreti_iret + +/* * Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU. * * - Calls the generic rendezvous action function. Index: i386/i386/mp_machdep.c =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/mp_machdep.c,v retrieving revision 1.302.2.12 diff -u -r1.302.2.12 mp_machdep.c --- i386/i386/mp_machdep.c 25 Apr 2012 07:10:17 -0000 1.302.2.12 +++ i386/i386/mp_machdep.c 6 May 2012 12:24:37 -0000 @@ -150,6 +150,7 @@ extern pt_entry_t *KPTphys; struct pcb stoppcbs[MAXCPU]; +struct pcb **susppcbs = NULL; /* Variables needed for SMP tlb shootdown. */ vm_offset_t smp_tlb_addr1; @@ -578,6 +579,9 @@ setidt(IPI_STOP, IDTVEC(cpustop), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + /* Install an inter-CPU IPI for CPU suspend/resume */ + setidt(IPI_SUSPEND, IDTVEC(cpususpend), + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); /* Set boot_cpu_id if needed. */ if (boot_cpu_id == -1) { @@ -1494,6 +1498,85 @@ } /* + * Handle an IPI_SUSPEND by saving our current context and spinning until we + * are resumed. + */ +void +cpususpend_handler(void) +{ + uint64_t dtr; + cpumask_t cpumask; + register_t cr0, cr2, cr3, cr4, rf; + u_int cpu; + struct region_descriptor idt, gdt; + uint16_t ldt, tr, gs, fs; + + cpu = PCPU_GET(cpuid); + cpumask = PCPU_GET(cpumask); + + rf = intr_disable(); + cr3 = rcr3(); + + /* Save descriptor tables */ + dtr = rgdt(); + gdt.rd_limit = dtr & 0xffff; + gdt.rd_base = dtr >> 16; + dtr = ridt(); + idt.rd_limit = dtr & 0xffff; + idt.rd_base = dtr >> 16; + ldt = rldt(); + + /* Save other registers */ + tr = rtr(); + fs = rfs(); + gs = rgs(); + cr0 = rcr0(); + cr2 = rcr2(); + cr4 = rcr4(); + + if (savectx(susppcbs[cpu])) { + wbinvd(); + atomic_set_int(&stopped_cpus, cpumask); + } else { + pmap_init_pat(); + PCPU_SET(switchtime, 0); + PCPU_SET(switchticks, ticks); + } + + /* Wait for resume */ + while (!(started_cpus & cpumask)) + ia32_pause(); + + /* Restore GDT. */ + lgdt(&gdt); + + /* Restore segment registers. */ + load_fs(fs); + load_gs(gs); + + /* Restore CR4, CR2, CR0. */ + load_cr4(cr4); + load_cr2(cr2); + load_cr0(cr0); + + /* Restore descriptor tables. */ + lidt(&idt); + lldt(ldt); + + /* Clear "task busy" bit and reload TR. */ + PCPU_GET(tss_gdt)->sd_type &= (~SDT_SYS386BSY | SDT_SYS386TSS); + ltr(tr); + + atomic_clear_int(&started_cpus, cpumask); + atomic_clear_int(&stopped_cpus, cpumask); + + /* Restore CR3 and enable interrupts */ + load_cr3(cr3); + mca_resume(); + lapic_setup(0); + intr_restore(rf); +} +/* * This is called once the rest of the system is up and running and we're * ready to let the AP's out of the pen. */ Index: i386/i386/swtch.s =================================================================== RCS file: /home/ncvs/src/sys/i386/i386/swtch.s,v retrieving revision 1.158.2.2 diff -u -r1.158.2.2 swtch.s --- i386/i386/swtch.s 19 Nov 2010 09:49:14 -0000 1.158.2.2 +++ i386/i386/swtch.s 6 May 2012 13:58:52 -0000 @@ -425,5 +425,6 @@ popfl #endif /* DEV_NPX */ + movl $1,%eax ret END(savectx) Index: i386/include/apicvar.h =================================================================== RCS file: /home/ncvs/src/sys/i386/include/apicvar.h,v retrieving revision 1.32.2.7 diff -u -r1.32.2.7 apicvar.h --- i386/include/apicvar.h 14 Jul 2010 21:10:14 -0000 1.32.2.7 +++ i386/include/apicvar.h 1 May 2012 16:14:30 -0000 @@ -130,7 +130,8 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ -#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */ +#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ +#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ #else /* XEN */ /* These are the normal i386 APIC definitions */ @@ -159,7 +160,8 @@ #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */ -#define IPI_STOP_HARD (APIC_IPI_INTS + 8) /* Stop CPU with a NMI. */ +#define IPI_SUSPEND (APIC_IPI_INTS + 8) /* Suspend CPU until restarted. */ +#define IPI_STOP_HARD (APIC_IPI_INTS + 9) /* Stop CPU with a NMI. */ #endif /* XEN */ /* Index: i386/include/cpufunc.h =================================================================== RCS file: /home/ncvs/src/sys/i386/include/cpufunc.h,v retrieving revision 1.155.2.3 diff -u -r1.155.2.3 cpufunc.h --- i386/include/cpufunc.h 25 Nov 2009 01:52:36 -0000 1.155.2.3 +++ i386/include/cpufunc.h 6 May 2012 11:10:42 -0000 @@ -370,6 +370,13 @@ return (data); } +static __inline void +load_cr2(u_int data) +{ + + __asm __volatile("movl %0,%%cr2" : : "r" (data)); +} + static __inline u_int rcr2(void) { Index: i386/include/pcb.h =================================================================== RCS file: /home/ncvs/src/sys/i386/include/pcb.h,v retrieving revision 1.57.2.2 diff -u -r1.57.2.2 pcb.h --- i386/include/pcb.h 19 Nov 2010 09:49:14 -0000 1.57.2.2 +++ i386/include/pcb.h 28 Apr 2012 05:23:15 -0000 @@ -84,7 +84,7 @@ struct trapframe; void makectx(struct trapframe *, struct pcb *); -void savectx(struct pcb *); +int savectx(struct pcb *); #endif #endif /* _I386_PCB_H_ */ Index: i386/include/smp.h =================================================================== RCS file: /home/ncvs/src/sys/i386/include/smp.h,v retrieving revision 1.99.2.3 diff -u -r1.99.2.3 smp.h --- i386/include/smp.h 9 Nov 2010 20:00:23 -0000 1.99.2.3 +++ i386/include/smp.h 1 May 2012 16:03:54 -0000 @@ -53,12 +53,14 @@ IDTVEC(invlcache), /* Write back and invalidate cache */ IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */ IDTVEC(cpustop), /* CPU stops & waits to be restarted */ + IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */ IDTVEC(rendezvous), /* handle CPU rendezvous */ IDTVEC(lazypmap); /* handle lazy pmap release */ /* functions in mp_machdep.c */ void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); +void cpususpend_handler(void); void init_secondary(void); void ipi_all_but_self(u_int ipi); #ifndef XEN Index: kern/subr_smp.c =================================================================== RCS file: /home/ncvs/src/sys/kern/subr_smp.c,v retrieving revision 1.214.2.13 diff -u -r1.214.2.13 subr_smp.c --- kern/subr_smp.c 23 Nov 2011 16:02:36 -0000 1.214.2.13 +++ kern/subr_smp.c 1 May 2012 16:18:57 -0000 @@ -206,7 +206,7 @@ int i; KASSERT( -#if defined(__amd64__) +#if defined(__amd64__) || defined(__i386__) type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND, #else type == IPI_STOP || type == IPI_STOP_HARD, @@ -256,7 +256,7 @@ return (generic_stop_cpus(map, IPI_STOP_HARD)); } -#if defined(__amd64__) +#if defined(__amd64__) || defined(__i386__) int suspend_cpus(cpumask_t map) { Index: sys/smp.h =================================================================== RCS file: /home/ncvs/src/sys/sys/smp.h,v retrieving revision 1.90.2.4 diff -u -r1.90.2.4 smp.h --- sys/smp.h 6 Nov 2010 09:23:49 -0000 1.90.2.4 +++ sys/smp.h 1 May 2012 16:25:25 -0000 @@ -161,7 +161,7 @@ int restart_cpus(cpumask_t); int stop_cpus(cpumask_t); int stop_cpus_hard(cpumask_t); -#if defined(__amd64__) +#if defined(__amd64__) || defined(__i386__) int suspend_cpus(cpumask_t); #endif void smp_rendezvous_action(void);