/*- * Copyright (c) 2007, by David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the developer may NOT be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* To do: dig locality information out of ACPI SRAT and SLIT tables. */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_sched.h" #include "opt_smp.h" #if !defined(lint) #if !defined(SMP) #error How did you get here? #endif #endif #ifndef DEV_APIC #error The apic device is required for SMP, add "device apic" to your config file. #endif #include #include #include #include #include #include #include #include #include #include #include #include #include struct cpu_topology_info cpu_top; static int package_id_bucket[MAXCPU]; static int core_id_bucket[MAXCPU]; static int update_package_map(struct cpu_topology_info *, int, u_int); static int update_core_map(struct cpu_topology_info *, int cpu, u_int, u_int); static int id_mask_width(int max_count) { if (--max_count == 0) return (0); return (bsrl(max_count) + 1); } /* * Extract package id, core id, and smt id out of apic id. */ static u_int get_sub_id(u_int apic_id, u_int max_sub_id, int shift_count) { int mask_width = id_mask_width(max_sub_id); u_int mask_bits, sub_id; mask_bits = ((0xffffffff << shift_count)) ^ ((0xffffffff << (shift_count + mask_width))); sub_id = apic_id & mask_bits; return (sub_id); } /* * Get maximum logical processors in current package. */ static int generic_max_lp_per_package(void) { u_int regs[4]; if (!(cpu_feature & CPUID_HTT)) return (1); do_cpuid(1, regs); return ((regs[1] & CPUID_HTT_CORES) >> 16); } /* * Get maximum processor cores in current package. */ static int generic_max_cores_per_package(void) { u_int regs[4]; if (!(cpu_feature & CPUID_HTT) || cpu_high < 4) return (1); cpuid_count(4, 0, regs); return (((regs[0] >> 26) & 0x3f) + 1); } static void generic_topology_update(struct cpu_topology_info *p) { int cpu; int lps_per_package, cores_per_package, lps_per_core; int package_index, core_index; u_int apic_id, smt_id, core_id, package_id, package_id_mask; cpu = PCPU_GET(cpuid); apic_id = lapic_id(); lps_per_package = generic_max_lp_per_package(); cores_per_package = generic_max_cores_per_package(); lps_per_core = lps_per_package / cores_per_package; printf("lps_per_core=%d\n", lps_per_core); smt_id = get_sub_id(apic_id, lps_per_core, 0); core_id = get_sub_id(apic_id, cores_per_package, id_mask_width(lps_per_core)); package_id_mask = 0xffffffff << id_mask_width(lps_per_package); package_id = apic_id & package_id_mask; package_index = update_package_map(p, cpu, package_id); core_index = update_core_map(p, cpu, package_id, core_id); if (cpu_high >= 4) { u_int regs[4]; int threads_per_cache, cache_level; cache_level = 1; do { cpuid_count(4, cache_level, regs); if ((regs[0] & 0x1f) == 0) break; threads_per_cache = ((regs[0] & 0x3ffc000) >> 14) + 1; if (threads_per_cache <= 1) continue; if (threads_per_cache == lps_per_core) p->core_flag[core_index] = CPU_TOP_SHARED_CACHE; if (threads_per_cache == lps_per_package) p->package_flag[package_index] = CPU_TOP_SHARED_CACHE; } while (++cache_level <= 3); } printf("%s: cpu:%d pkg:%d pkgflag:%d core:%d coreflag:%d smt:%d\n", __func__, cpu, package_index, p->package_flag[package_index], core_index, p->core_flag[core_index], smt_id); } /* * Get maximum logical processors in current package. */ static int amd_max_lp_per_package(void) { u_int regs[4]; if (!(cpu_feature & CPUID_HTT) || cpu_exthigh < 0x80000001) return (1); do_cpuid(0x80000001, regs); /* * When HTT=1 and CmpLegacy=1, LogicalProcessorCount * represents the number of CPU cores per package, * where each CPU core is single-threaded. */ if (regs[2] & 0x2) /* CmpLegacy */ return (generic_max_lp_per_package()); /* * When HTT=1 and CmpLegacy=0, LogicalProcessorCount * represents the number of total threads for the processor. */ return (generic_max_lp_per_package()); } /* * Get maximum processor cores in current package. */ static int amd_max_cores_per_package(void) { u_int regs[4]; int apic_id_core_id_size; if (!(cpu_feature & CPUID_HTT) || cpu_exthigh < 0x80000001) return (1); do_cpuid(0x80000001, regs); /* * When HTT=1 and CmpLegacy=1, LogicalProcessorCount * represents the number of CPU cores per package, * where each CPU core is single-threaded. */ if (regs[2] & 0x2) /* CmpLegacy */ return (generic_max_lp_per_package()); /* * When HTT=1 and CmpLegacy=0, LogicalProcessorCount * represents the number of total threads for the processor. */ if (cpu_exthigh < 0x80000008) return (generic_max_lp_per_package()); do_cpuid(0x80000008, regs); apic_id_core_id_size = (regs[2] >> 12) & 0xf; if (apic_id_core_id_size == 0) return ((regs[2] & 0xff) + 1); return (1 << apic_id_core_id_size); } static void amd_topology_update(struct cpu_topology_info *p) { int cpu; int lps_per_package, cores_per_package, lps_per_core; int package_index, core_index; u_int apic_id, smt_id, core_id, package_id, package_id_mask; cpu = PCPU_GET(cpuid); apic_id = lapic_id(); lps_per_package = amd_max_lp_per_package(); cores_per_package = amd_max_cores_per_package(); lps_per_core = lps_per_package / cores_per_package; smt_id = get_sub_id(apic_id, lps_per_core, 0); core_id = get_sub_id(apic_id, cores_per_package, id_mask_width(lps_per_core)); package_id_mask = 0xffffffff << id_mask_width(lps_per_package); package_id = apic_id & package_id_mask; package_index = update_package_map(p, cpu, package_id); core_index = update_core_map(p, cpu, package_id, core_id); if (cpu_exthigh >= 0x80000006) { u_int regs[4]; do_cpuid(0x80000006, regs); if (regs[3] != 0) p->package_flag[package_index] = CPU_TOP_SHARED_CACHE; } printf("%s: cpu:%d pkg:%d pkgflag:%d core:%d coreflag:%d smt:%d\n", __func__, cpu, package_index, p->package_flag[package_index], core_index, p->core_flag[core_index], smt_id); } /* * Called by each cpu to update topology structures. */ void cpu_topology_update(void) { if (strcmp(cpu_vendor, "GenuineIntel") == 0) generic_topology_update(&cpu_top); else if (strcmp(cpu_vendor, "AuthenticAMD") == 0) amd_topology_update(&cpu_top); else cpu_top.package_num = 0; } /* * Update processor package map for cpu, before calling this function, * the cpu's package id, core id and SMT id should have already * be figured out. */ static int update_package_map(struct cpu_topology_info *p, int cpu, u_int package_id) { cpumask_t cpu_mask; int i; cpu_mask = 1 << cpu; for (i = 0; i < p->package_num; i++) { /* * we may be comparing bit-fields of logical processors * residing in different packages, the code below assume * package symmetry */ if (package_id_bucket[i] == package_id) { p->package_cpu_mask[i] |= cpu_mask; return (i); } } p->cpu_to_package[cpu] = i; package_id_bucket[i] = package_id; p->package_cpu_mask[i] = cpu_mask; p->package_num++; return (i); } /* * Update processor core map for cpu, before calling this function, * the cpu's package id, core id and SMT id should have already * be figured out. */ static int update_core_map(struct cpu_topology_info *p, int cpu, u_int package_id, u_int core_id) { cpumask_t cpu_mask; int i; cpu_mask = 1 << cpu; for (i = 0; i < p->core_num; i++) { /* * we may be comparing bit-fields of logical processors * residing in different packages, the code below assume * package symmetry */ if ((package_id | core_id) == core_id_bucket[i]) { p->core_cpu_mask[i] |= cpu_mask; return (i); } } p->cpu_to_core[cpu] = i; core_id_bucket[i] = package_id | core_id; p->core_cpu_mask[i] = cpu_mask; p->core_num++; return (i); }