From 6bca4a2e4854ea3fc275946a023db65c483cb9dd Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Wed, 10 Aug 2011 00:06:16 +0300 Subject: [PATCH] [test][wip] new x86 smp topology detection code - based on APIC ID derivation rules for Intel and AMD CPUs - can handle non-uniform topologies - requires homogeneous APIC ID assignment (same bit widths for ID components) - amd64-only at the moment - doesn't yet handle dual-node AMD CPUs - supports only package/core/cache nodes Todo: - i386 counterparts - AMD dual-node processors - NUMA nodes - AMD Bulldozer module nodes (?) - checking for homogeneity of the APIC ID assignment across packages - more flexible cache placement within topology Long term todo: - sharing of other resources like FPU --- sys/amd64/amd64/mp_machdep.c | 1038 +++++++++++++++++++++++++++++++----------- 1 files changed, 772 insertions(+), 266 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 44bd1b2..77028eb 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -132,6 +133,9 @@ static struct mtx ap_boot_mtx; /* Set to 1 once we're ready to let the APs out of the pen. */ static volatile int aps_ready = 0; +/* Some topology constants. */ +#define MAX_CACHE_LEVELS 3 + /* * Store data from cpu_add() until later in the boot when we actually setup * the APs. @@ -142,15 +146,62 @@ struct cpu_info { int cpu_disabled:1; int cpu_hyperthread:1; } static cpu_info[MAX_APIC_ID + 1]; -int cpu_apic_ids[MAXCPU]; int apic_cpuids[MAX_APIC_ID + 1]; +int cpu_apic_ids[MAXCPU]; + +typedef enum { + /* No node has this type; can be used in topo API calls. */ + TOPO_TYPE_DUMMY, + /* Processing unit aka computing unit aka logical CPU. */ + TOPO_TYPE_PU, + /* Physical subdivision of a package. */ + TOPO_TYPE_CORE, + /* CPU L1/L2/L3 cache. */ + TOPO_TYPE_CACHE, + /* Package aka chip, equivalent to socket. */ + TOPO_TYPE_PKG, + /* NUMA node. */ + TOPO_TYPE_NODE, + /* Other logical or physical grouping of PUs. */ + /* E.g. PUs on the same dye, or PUs sharing an FPU. */ + TOPO_TYPE_GROUP, + /* The whole system. */ + TOPO_TYPE_SYSTEM +} topo_node_type; + +typedef unsigned int hwid_t; +typedef int cpuid_t; + +struct topo_node { + struct topo_node *parent; + TAILQ_HEAD(topo_children, topo_node) children; + TAILQ_ENTRY(topo_node) siblings; + cpuset_t cpuset; + topo_node_type type; + uintptr_t subtype; + hwid_t hwid; + cpuid_t id; + int nchildren; + int cpu_count; +}; + +MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data"); /* Holds pending bitmap based IPIs per CPU */ static volatile u_int cpu_ipi_pending[MAXCPU]; static u_int boot_address; -static int cpu_logical; /* logical cpus per core */ -static int cpu_cores; /* cores per package */ + +static struct topo_node topo_root; + +static int pkg_id_shift; +static int core_id_shift; +static int topo_has_disabled; + +struct cache_info { + int id_shift; + int present; +} static caches[MAX_CACHE_LEVELS]; static void assign_cpu_ids(void); static void set_interrupt_apic_ids(void); @@ -158,10 +209,257 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static int hyperthreading_allowed = 1; static u_int bootMP_size; +static int hyperthreading_allowed = 1; +TUNABLE_INT("machdep.hyperthreading_allowed", &hyperthreading_allowed); +SYSCTL_INT(_machdep, OID_AUTO, hyperthreading_allowed, CTLFLAG_RDTUN, + &hyperthreading_allowed, 0, "Use Intel HTT logical CPUs"); + + +//////////////////////////////////////////// +void topo_init_node(struct topo_node *node); +void topo_init_root(struct topo_node *root); +struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid, topo_node_type type, uintptr_t subtype); +struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid, topo_node_type type, uintptr_t subtype); +void topo_make_1st_child(struct topo_node *child); +struct topo_node * topo_next_node(struct topo_node *top, struct topo_node *node); +struct topo_node * topo_next_nonchild_node(struct topo_node *top, struct topo_node *node); +void topo_set_pu_id(struct topo_node *node, cpuid_t id); +int topo_get_uniparams(struct topo_node *topo_root, int all, int *pkg_count, int *cores_per_pkg, int *thrs_per_core); + + +void +topo_init_node(struct topo_node *node) +{ + + bzero(node, sizeof(*node)); + TAILQ_INIT(&node->children); +} + +void +topo_init_root(struct topo_node *root) +{ + + topo_init_node(root); + root->type = TOPO_TYPE_SYSTEM; +} + +struct topo_node * +topo_add_node_by_hwid(struct topo_node *parent, int hwid, + topo_node_type type, uintptr_t subtype) +{ + struct topo_node *node; + + TAILQ_FOREACH_REVERSE(node, &parent->children, + topo_children, siblings) { + if (node->hwid == hwid + && node->type == type && node->subtype == subtype) { + return (node); + } + } + + node = malloc(sizeof(*node), M_TOPO, M_WAITOK); + topo_init_node(node); + node->parent = parent; + node->hwid = hwid; + node->type = type; + node->subtype = subtype; + TAILQ_INSERT_TAIL(&parent->children, node, siblings); + parent->nchildren++; + + return (node); +} + +struct topo_node * +topo_find_node_by_hwid(struct topo_node *parent, int hwid, + topo_node_type type, uintptr_t subtype) +{ + + struct topo_node *node; + + TAILQ_FOREACH(node, &parent->children, siblings) { + if (node->hwid == hwid + && node->type == type && node->subtype == subtype) { + return (node); + } + } + + return (NULL); +} + +void +topo_make_1st_child(struct topo_node *child) +{ + struct topo_node *next; + struct topo_node *node; + struct topo_node *parent; + + parent = child->parent; + next = TAILQ_NEXT(child, siblings); + TAILQ_REMOVE(&parent->children, child, siblings); + TAILQ_INSERT_HEAD(&parent->children, child, siblings); + + while (next != NULL) { + node = next; + next = TAILQ_NEXT(node, siblings); + TAILQ_REMOVE(&parent->children, node, siblings); + TAILQ_INSERT_AFTER(&parent->children, child, node, siblings); + child = node; + } +} + +struct topo_node * +topo_next_node(struct topo_node *top, struct topo_node *node) +{ + struct topo_node *next; + + if ((next = TAILQ_FIRST(&node->children)) != NULL) + return (next); + + if ((next = TAILQ_NEXT(node, siblings)) != NULL) + return (next); + + while ((node = node->parent) != top) + if ((next = TAILQ_NEXT(node, siblings)) != NULL) + return (next); + + return (NULL); +} + +struct topo_node * +topo_next_nonchild_node(struct topo_node *top, struct topo_node *node) +{ + struct topo_node *next; + + if ((next = TAILQ_NEXT(node, siblings)) != NULL) + return (next); + + while ((node = node->parent) != top) + if ((next = TAILQ_NEXT(node, siblings)) != NULL) + return (next); + + return (NULL); +} + +void +topo_set_pu_id(struct topo_node *node, cpuid_t id) +{ + + KASSERT(node->type == TOPO_TYPE_PU, + ("topo_set_pu_id: wrong node type: %u", node->type)); + KASSERT(CPU_EMPTY(&node->cpuset) && node->cpu_count == 0, + ("topo_set_pu_id: cpuset already not empty")); + node->id = id; + CPU_SET(id, &node->cpuset); + node->cpu_count = 1; + node->subtype = 1; + + while ((node = node->parent) != NULL) { + if (CPU_ISSET(id, &node->cpuset)) + break; + CPU_SET(id, &node->cpuset); + node->cpu_count++; + } +} + +int +topo_get_uniparams(struct topo_node *topo_root, int all, + int *pkg_count, int *cores_per_pkg, int *thrs_per_core) +{ + struct topo_node *pkg_node; + struct topo_node *core_node; + struct topo_node *pu_node; + int thrs_per_pkg; + int cpp_counter; + int tpc_counter; + int tpp_counter; + + *pkg_count = 0; + *cores_per_pkg = -1; + *thrs_per_core = -1; + thrs_per_pkg = -1; + pkg_node = topo_root; + while (pkg_node != NULL) { + if (pkg_node->type != TOPO_TYPE_PKG) { + pkg_node = topo_next_node(topo_root, pkg_node); + continue; + } + if (!all && CPU_EMPTY(&pkg_node->cpuset)) { + pkg_node = topo_next_nonchild_node(topo_root, pkg_node); + continue; + } + + (*pkg_count)++; + + cpp_counter = 0; + tpp_counter = 0; + core_node = pkg_node; + while (core_node != NULL) { + if (core_node->type == TOPO_TYPE_CORE) { + if (!all && CPU_EMPTY(&core_node->cpuset)) { + core_node = + topo_next_nonchild_node(pkg_node, + core_node); + continue; + } + + cpp_counter++; + + tpc_counter = 0; + pu_node = core_node; + while (pu_node != NULL) { + if (pu_node->type == TOPO_TYPE_PU && + (all || !CPU_EMPTY(&pu_node->cpuset))) + tpc_counter++; + pu_node = topo_next_node(core_node, + pu_node); + } + + if (*thrs_per_core == -1) + *thrs_per_core = tpc_counter; + else if (*thrs_per_core != tpc_counter) + return (0); + + core_node = topo_next_nonchild_node(pkg_node, + core_node); + } else { + /* PU node directly under PKG. */ + if (core_node->type == TOPO_TYPE_PU && + (all || !CPU_EMPTY(&core_node->cpuset))) + tpp_counter++; + core_node = topo_next_node(pkg_node, + core_node); + } + } + + if (*cores_per_pkg == -1) + *cores_per_pkg = cpp_counter; + else if (*cores_per_pkg != cpp_counter) + return (0); + if (thrs_per_pkg == -1) + thrs_per_pkg = tpp_counter; + else if (thrs_per_pkg != tpp_counter) + return (0); + + pkg_node = topo_next_nonchild_node(topo_root, pkg_node); + } + + KASSERT(*pkg_count > 0, + ("bug in topology or analysis")); + if (*cores_per_pkg == 0) { + KASSERT(*thrs_per_core == -1 && thrs_per_pkg > 0, + ("bug in topology or analysis")); + *thrs_per_core = thrs_per_pkg; + } + + return (1); +} + +#define TOPO_FOREACH(i, root) \ + for (i = root; i != NULL; i = topo_next_node(root, i)) +//////////////////////////////////////////// + static void mem_range_AP_init(void) { @@ -169,36 +467,110 @@ mem_range_AP_init(void) mem_range_softc.mr_op->initAP(&mem_range_softc); } +static __inline int +mask_width(u_int x) +{ + + return (fls(x << (1 - powerof2(x))) - 1); +} + +static int +add_deterministic_cache(int type, int level, int share_count) +{ + + if (type == 0) + return (0); + if (type > 3) { + printf("unexpected cache type %d\n", type); + return (1); + } + if (type == 2) /* ignore instruction cache */ + return (1); + if (level == 0 || level > MAX_CACHE_LEVELS) { + printf("unexpected cache level %d\n", type); + return (1); + } + + if (caches[level - 1].present) { + printf("WARNING: multiple entries for L%u data cache\n", level); + printf("%u => %u\n", caches[level - 1].id_shift, + mask_width(share_count)); + } + caches[level - 1].id_shift = mask_width(share_count); + caches[level - 1].present = 1; + + if (caches[level - 1].id_shift > pkg_id_shift) { + printf("WARNING: L%u data cache covers more " + "APIC IDs than a package\n", level); + printf("%u > %u\n", caches[level - 1].id_shift, pkg_id_shift); + caches[level - 1].id_shift = pkg_id_shift; + } + if (caches[level - 1].id_shift < core_id_shift) { + printf("WARNING: L%u data cache covers less " + "APIC IDs than a core\n", level); + printf("%u < %u\n", caches[level - 1].id_shift, core_id_shift); + caches[level - 1].id_shift = core_id_shift; + } + + return (1); +} + static void topo_probe_amd(void) { - int core_id_bits; - int id; - - /* AMD processors do not support HTT. */ - cpu_logical = 1; + u_int p[4]; + int level; + int share_count; + int type; + int i; - if ((amd_feature2 & AMDID2_CMP) == 0) { - cpu_cores = 1; + /* No multi-core capability. */ + if ((amd_feature2 & AMDID2_CMP) == 0) return; - } - core_id_bits = (cpu_procinfo2 & AMDID_COREID_SIZE) >> + /* For families 10h and newer. */ + pkg_id_shift = (cpu_procinfo2 & AMDID_COREID_SIZE) >> AMDID_COREID_SIZE_SHIFT; - if (core_id_bits == 0) { - cpu_cores = (cpu_procinfo2 & AMDID_CMP_CORES) + 1; - return; - } - /* Fam 10h and newer should get here. */ - for (id = 0; id <= MAX_APIC_ID; id++) { - /* Check logical CPU availability. */ - if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) - continue; - /* Check if logical CPU has the same package ID. */ - if ((id >> core_id_bits) != (boot_cpu_id >> core_id_bits)) - continue; - cpu_cores++; + /* For 0Fh family. */ + if (pkg_id_shift == 0) + pkg_id_shift = + mask_width((cpu_procinfo2 & AMDID_CMP_CORES) + 1); + + if ((amd_feature2 & AMDID2_TOPOLOGY) != 0) { + for (i = 0; ; i++) { + cpuid_count(0x8000001d, i, p); + type = p[0] & 0x1f; + level = (p[0] >> 5) & 0x7; + share_count = 1 + ((p[0] >> 14) & 0xfff); + + if (!add_deterministic_cache(type, level, share_count)) + break; + } + } else { + if (cpu_exthigh >= 0x80000005) { + cpuid_count(0x80000005, 0, p); + if (((p[2] >> 24) & 0xff) != 0) { + caches[0].id_shift = 0; + caches[0].present = 1; + } + } + if (cpu_exthigh >= 0x80000006) { + cpuid_count(0x80000006, 0, p); + if (((p[2] >> 16) & 0xffff) != 0) { + caches[1].id_shift = 0; + caches[1].present = 1; + } + if (((p[3] >> 18) & 0x3fff) != 0) { + /* + * XXX TODO: Account for dual-node processors + * where each node within a package has its own + * L3 cache. + */ + caches[2].id_shift = pkg_id_shift; + caches[2].present = 1; + } + } } } @@ -207,22 +579,12 @@ topo_probe_amd(void) * take log2. * Returns -1 if argument is zero. */ -static __inline int -mask_width(u_int x) -{ - - return (fls(x << (1 - powerof2(x))) - 1); -} - static void topo_probe_0x4(void) { u_int p[4]; - int pkg_id_bits; - int core_id_bits; int max_cores; int max_logical; - int id; /* Both zero and one here mean one logical processor per package. */ max_logical = (cpu_feature & CPUID_HTT) != 0 ? @@ -230,41 +592,16 @@ topo_probe_0x4(void) if (max_logical <= 1) return; - /* - * Because of uniformity assumption we examine only - * those logical processors that belong to the same - * package as BSP. Further, we count number of - * logical processors that belong to the same core - * as BSP thus deducing number of threads per core. - */ if (cpu_high >= 0x4) { cpuid_count(0x04, 0, p); max_cores = ((p[0] >> 26) & 0x3f) + 1; } else max_cores = 1; - core_id_bits = mask_width(max_logical/max_cores); - if (core_id_bits < 0) - return; - pkg_id_bits = core_id_bits + mask_width(max_cores); - - for (id = 0; id <= MAX_APIC_ID; id++) { - /* Check logical CPU availability. */ - if (!cpu_info[id].cpu_present || cpu_info[id].cpu_disabled) - continue; - /* Check if logical CPU has the same package ID. */ - if ((id >> pkg_id_bits) != (boot_cpu_id >> pkg_id_bits)) - continue; - cpu_cores++; - /* Check if logical CPU has the same package and core IDs. */ - if ((id >> core_id_bits) == (boot_cpu_id >> core_id_bits)) - cpu_logical++; - } - - KASSERT(cpu_cores >= 1 && cpu_logical >= 1, - ("topo_probe_0x4 couldn't find BSP")); - cpu_cores /= cpu_logical; - hyperthreading_cpus = cpu_logical; + core_id_shift = mask_width(max_logical/max_cores); + KASSERT(core_id_shift >= 0, + ("intel topo: max_cores > max_logical\n")); + pkg_id_shift = core_id_shift + mask_width(max_cores); } static void @@ -272,47 +609,94 @@ topo_probe_0xb(void) { u_int p[4]; int bits; - int cnt; - int i; - int logical; int type; - int x; + int i; + + /* Fall back if CPU leaf 11 doesn't really exist. */ + cpuid_count(0x0b, 0, p); + if (p[1] == 0) { + topo_probe_0x4(); + return; + } /* We only support three levels for now. */ - for (i = 0; i < 3; i++) { + for (i = 0; ; i++) { cpuid_count(0x0b, i, p); - /* Fall back if CPU leaf 11 doesn't really exist. */ - if (i == 0 && p[1] == 0) { - topo_probe_0x4(); - return; - } - bits = p[0] & 0x1f; - logical = p[1] &= 0xffff; type = (p[2] >> 8) & 0xff; - if (type == 0 || logical == 0) + + if (type == 0) break; - /* - * Because of uniformity assumption we examine only - * those logical processors that belong to the same - * package as BSP. - */ - for (cnt = 0, x = 0; x <= MAX_APIC_ID; x++) { - if (!cpu_info[x].cpu_present || - cpu_info[x].cpu_disabled) - continue; - if (x >> bits == boot_cpu_id >> bits) - cnt++; - } + + /* TODO: check for duplicate (re-)assignment */ if (type == CPUID_TYPE_SMT) - cpu_logical = cnt; + core_id_shift = bits; else if (type == CPUID_TYPE_CORE) - cpu_cores = cnt; + pkg_id_shift = bits; + else + printf("unknown CPU level type %d\n", type); + } + + if (pkg_id_shift < core_id_shift) { + printf("WARNING: core covers more APIC IDs than a package\n"); + core_id_shift = pkg_id_shift; + } +} + +static void +topo_probe_intel_caches(void) +{ + u_int p[4]; + int level; + int share_count; + int type; + int i; + + if (cpu_high < 0x4) { + /* + * Available cache level and sizes can be determined + * via CPUID leaf 2, but that requires a huge table of hardcoded + * values, so for now just assume L1 and L2 caches potentially + * shared only by HTT processing units, if HTT is present. + */ + caches[0].id_shift = pkg_id_shift; + caches[0].present = 1; + caches[1].id_shift = pkg_id_shift; + caches[1].present = 1; + return; + } + + for (i = 0; ; i++) { + cpuid_count(0x4, i, p); + type = p[0] & 0x1f; + level = (p[0] >> 5) & 0x7; + share_count = 1 + ((p[0] >> 14) & 0xfff); + + if (!add_deterministic_cache(type, level, share_count)) + break; } - if (cpu_logical == 0) - cpu_logical = 1; - cpu_cores /= cpu_logical; +} + +static void +topo_probe_intel(void) +{ + + /* + * See Intel(R) 64 Architecture Processor + * Topology Enumeration article for details. + * + * Note that 0x1 <= cpu_high < 4 case should be + * compatible with topo_probe_0x4() logic when + * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) + * or it should trigger the fallback otherwise. + */ + if (cpu_high >= 0xb) + topo_probe_0xb(); + else if (cpu_high >= 0x1) + topo_probe_0x4(); + + topo_probe_intel_caches(); } /* @@ -329,81 +713,317 @@ static void topo_probe(void) { static int cpu_topo_probed = 0; + struct topo_layer { + int type; + int subtype; + int id_shift; + } topo_layers[MAX_CACHE_LEVELS + 3]; + struct topo_node *parent; + struct topo_node *node; + int layer; + int nlayers; + int node_id; + int i; if (cpu_topo_probed) return; CPU_ZERO(&logical_cpus_mask); + if (mp_ncpus <= 1) - cpu_cores = cpu_logical = 1; + ; /* nothing */ else if (cpu_vendor_id == CPU_VENDOR_AMD) topo_probe_amd(); - else if (cpu_vendor_id == CPU_VENDOR_INTEL) { - /* - * See Intel(R) 64 Architecture Processor - * Topology Enumeration article for details. - * - * Note that 0x1 <= cpu_high < 4 case should be - * compatible with topo_probe_0x4() logic when - * CPUID.1:EBX[23:16] > 0 (cpu_cores will be 1) - * or it should trigger the fallback otherwise. - */ - if (cpu_high >= 0xb) - topo_probe_0xb(); - else if (cpu_high >= 0x1) - topo_probe_0x4(); - } + else if (cpu_vendor_id == CPU_VENDOR_INTEL) + topo_probe_intel(); + + KASSERT(pkg_id_shift >= core_id_shift, + ("bug in APIC topology discovery")); + + nlayers = 0; + bzero(topo_layers, sizeof(topo_layers)); + + topo_layers[nlayers].type = TOPO_TYPE_PKG; + topo_layers[nlayers].id_shift = pkg_id_shift; + if (bootverbose) + printf("Package ID shift: %u\n", topo_layers[nlayers].id_shift); + nlayers++; /* - * Fallback: assume each logical CPU is in separate - * physical package. That is, no multi-core, no SMT. + * Consider all caches to be within a package/chip + * and "in front" of all sub-components like + * cores and hardware threads. */ - if (cpu_cores == 0 || cpu_logical == 0) - cpu_cores = cpu_logical = 1; + for (i = MAX_CACHE_LEVELS - 1; i >= 0; --i) { + if (caches[i].present) { + KASSERT(caches[i].id_shift <= pkg_id_shift, + ("bug in APIC topology discovery")); + KASSERT(caches[i].id_shift >= core_id_shift, + ("bug in APIC topology discovery")); + + topo_layers[nlayers].type = TOPO_TYPE_CACHE; + topo_layers[nlayers].subtype = i + 1; + topo_layers[nlayers].id_shift = caches[i].id_shift; + if (bootverbose) + printf("L%u cache ID shift: %u\n", + topo_layers[nlayers].subtype, + topo_layers[nlayers].id_shift); + nlayers++; + } + } + + if (pkg_id_shift > core_id_shift) { + topo_layers[nlayers].type = TOPO_TYPE_CORE; + topo_layers[nlayers].id_shift = core_id_shift; + if (bootverbose) + printf("Core ID shift: %u\n", + topo_layers[nlayers].id_shift); + nlayers++; + } + + topo_layers[nlayers].type = TOPO_TYPE_PU; + topo_layers[nlayers].id_shift = 0; + nlayers++; + + topo_init_root(&topo_root); + for (i = 0; i <= MAX_APIC_ID; ++i) { + if (!cpu_info[i].cpu_present) + continue; + + parent = &topo_root; + for (layer = 0; layer < nlayers; ++layer) { + node_id = i >> topo_layers[layer].id_shift; + parent = topo_add_node_by_hwid(parent, node_id, + topo_layers[layer].type, + topo_layers[layer].subtype); + } + } + + parent = &topo_root; + for (layer = 0; layer < nlayers; ++layer) { + node_id = boot_cpu_id >> topo_layers[layer].id_shift; + node = topo_find_node_by_hwid(parent, node_id, + topo_layers[layer].type, + topo_layers[layer].subtype); + topo_make_1st_child(node); + parent = node; + } + cpu_topo_probed = 1; } -struct cpu_group * -cpu_topo(void) +/* + * Assign logical CPU IDs to local APICs. + */ +static void +assign_cpu_ids(void) { - int cg_flags; + struct topo_node *node; + u_int smt_mask; + + smt_mask = (1u << core_id_shift) - 1; /* - * Determine whether any threading flags are - * necessry. + * Assign CPU IDs to local APIC IDs and disable any CPUs + * beyond MAXCPU. CPU 0 is always assigned to the BSP. */ - topo_probe(); - if (cpu_logical > 1 && hyperthreading_cpus) - cg_flags = CG_FLAG_HTT; - else if (cpu_logical > 1) - cg_flags = CG_FLAG_SMT; + mp_ncpus = 0; + TOPO_FOREACH(node, &topo_root) { + if (node->type != TOPO_TYPE_PU) + continue; + + if ((node->hwid & smt_mask) != (boot_cpu_id & smt_mask)) + cpu_info[node->hwid].cpu_hyperthread = 1; + + if (resource_disabled("lapic", node->hwid)) { + if (node->hwid != boot_cpu_id) + cpu_info[node->hwid].cpu_disabled = 1; + else + printf("Cannot disable BSP, APIC ID = %d\n", + node->hwid); + } + + if (!hyperthreading_allowed && + cpu_info[node->hwid].cpu_hyperthread) + cpu_info[node->hwid].cpu_disabled = 1; + + if (mp_ncpus >= MAXCPU) + cpu_info[node->hwid].cpu_disabled = 1; + + if (cpu_info[node->hwid].cpu_disabled) { + topo_has_disabled = 1; + continue; + } + + cpu_apic_ids[mp_ncpus] = node->hwid; + apic_cpuids[node->hwid] = mp_ncpus; + topo_set_pu_id(node, mp_ncpus); + mp_ncpus++; + } + + KASSERT(mp_maxid >= mp_ncpus - 1, + ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, + mp_ncpus)); +} + +/* + * Print various information about the SMP system hardware and setup. + */ +void +cpu_mp_announce(void) +{ + struct topo_node *node; + const char *hyperthread; + int pkg_count; + int cores_per_pkg; + int thrs_per_core; + + printf("FreeBSD/SMP: "); + if (topo_get_uniparams(&topo_root, 1, &pkg_count, + &cores_per_pkg, &thrs_per_core)) { + printf("%d package(s)", pkg_count); + if (cores_per_pkg > 0) + printf(" x %d core(s)", cores_per_pkg); + if (thrs_per_core > 1) + printf(" x %d hardware threads", thrs_per_core); + } else { + printf("Non-uniform topology"); + } + printf("\n"); + + if (topo_has_disabled) { + printf("FreeBSD/SMP Online: "); + if (topo_get_uniparams(&topo_root, 0, &pkg_count, + &cores_per_pkg, &thrs_per_core)) { + printf("%d package(s)", pkg_count); + if (cores_per_pkg > 0) + printf(" x %d core(s)", cores_per_pkg); + if (thrs_per_core > 1) + printf(" x %d hardware threads", thrs_per_core); + } else { + printf("Non-uniform topology"); + } + printf("\n"); + } + + TOPO_FOREACH(node, &topo_root) { + switch (node->type) { + case TOPO_TYPE_PKG: + printf("Package HW ID = %u (%#x)\n", + node->hwid, node->hwid); + break; + case TOPO_TYPE_CORE: + printf("\tCore HW ID = %u (%#x)\n", + node->hwid, node->hwid); + break; + case TOPO_TYPE_PU: + if (cpu_info[node->hwid].cpu_hyperthread) + hyperthread = "/HT"; + else + hyperthread = ""; + + if (node->subtype == 0) + printf("\t\tCPU (AP%s): APIC ID: %u (%#x)" + "(disabled)\n", hyperthread, node->hwid, + node->hwid); + else if (node->id == 0) + printf("\t\tCPU0 (BSP): APIC ID: %u (%#x)\n", + node->hwid, node->hwid); + else + printf("\t\tCPU%u (AP%s): APIC ID: %u (%#x)\n", + node->id, hyperthread, node->hwid, + node->hwid); + break; + default: + /* ignored */ + break; + } + } +} + +static void +x86topo_add_sched_group(struct topo_node *root, struct cpu_group *cg_root) +{ + struct topo_node *node; + int nchildren; + int ncores; + int i; + + KASSERT(root->type == TOPO_TYPE_SYSTEM || root->type == TOPO_TYPE_CACHE, + ("x86topo_add_sched_group: bad type: %u", root->type)); + CPU_COPY(&root->cpuset, &cg_root->cg_mask); + cg_root->cg_count = root->cpu_count; + if (root->type == TOPO_TYPE_SYSTEM) + cg_root->cg_level = CG_SHARE_NONE; else - cg_flags = 0; - if (mp_ncpus % (cpu_cores * cpu_logical) != 0) { - printf("WARNING: Non-uniform processors.\n"); - printf("WARNING: Using suboptimal topology.\n"); - return (smp_topo_none()); + cg_root->cg_level = root->subtype; + + ncores = 0; + node = root; + while (node != NULL) { + if (node->type != TOPO_TYPE_CORE) { + node = topo_next_node(root, node); + continue; + } + + ncores++; + node = topo_next_nonchild_node(root, node); } - /* - * No multi-core or hyper-threaded. - */ - if (cpu_logical * cpu_cores == 1) + + if (cg_root->cg_level != CG_SHARE_NONE && + root->cpu_count > 1 && ncores < 2) + cg_root->cg_flags = CG_FLAG_SMT; + + nchildren = 0; + node = root; + while (node != NULL) { + if (node->type != TOPO_TYPE_CACHE || + CPU_CMP(&node->cpuset, &root->cpuset) == 0) { + node = topo_next_node(root, node); + continue; + } + + /* XXX What about cache used by a single PU? */ + if (node->cpu_count > 1) + nchildren++; + node = topo_next_nonchild_node(root, node); + } + + cg_root->cg_child = malloc(nchildren * sizeof(*cg_root->cg_child), + M_TOPO, M_WAITOK | M_ZERO); + cg_root->cg_children = nchildren; + + node = root; + i = 0; + while (node != NULL) { + if (node->type != TOPO_TYPE_CACHE || + CPU_CMP(&node->cpuset, &root->cpuset) == 0) { + node = topo_next_node(root, node); + continue; + } + + /* XXX What about cache used by a single PU? */ + if (node->cpu_count > 1) { + cg_root->cg_child[i].cg_parent = cg_root; + x86topo_add_sched_group(node, &cg_root->cg_child[i]); + i++; + } + + node = topo_next_nonchild_node(root, node); + } +} + +struct cpu_group * +cpu_topo(void) +{ + struct cpu_group *cg_root; + + if (mp_ncpus <= 1) return (smp_topo_none()); - /* - * Only HTT no multi-core. - */ - if (cpu_logical > 1 && cpu_cores == 1) - return (smp_topo_1level(CG_SHARE_L1, cpu_logical, cg_flags)); - /* - * Only multi-core no HTT. - */ - if (cpu_cores > 1 && cpu_logical == 1) - return (smp_topo_1level(CG_SHARE_L2, cpu_cores, cg_flags)); - /* - * Both HTT and multi-core. - */ - return (smp_topo_2level(CG_SHARE_L2, cpu_cores, - CG_SHARE_L1, cpu_logical, cg_flags)); + + cg_root = malloc(sizeof(*cg_root), M_TOPO, M_WAITOK | M_ZERO); + x86topo_add_sched_group(&topo_root, cg_root); + return (cg_root); } /* @@ -558,47 +1178,6 @@ cpu_mp_start(void) /* - * Print various information about the SMP system hardware and setup. - */ -void -cpu_mp_announce(void) -{ - const char *hyperthread; - int i; - - printf("FreeBSD/SMP: %d package(s) x %d core(s)", - mp_ncpus / (cpu_cores * cpu_logical), cpu_cores); - if (hyperthreading_cpus > 1) - printf(" x %d HTT threads", cpu_logical); - else if (cpu_logical > 1) - printf(" x %d SMT threads", cpu_logical); - printf("\n"); - - /* List active CPUs first. */ - printf(" cpu0 (BSP): APIC ID: %2d\n", boot_cpu_id); - for (i = 1; i < mp_ncpus; i++) { - if (cpu_info[cpu_apic_ids[i]].cpu_hyperthread) - hyperthread = "/HT"; - else - hyperthread = ""; - printf(" cpu%d (AP%s): APIC ID: %2d\n", i, hyperthread, - cpu_apic_ids[i]); - } - - /* List disabled CPUs last. */ - for (i = 0; i <= MAX_APIC_ID; i++) { - if (!cpu_info[i].cpu_present || !cpu_info[i].cpu_disabled) - continue; - if (cpu_info[i].cpu_hyperthread) - hyperthread = "/HT"; - else - hyperthread = ""; - printf(" cpu (AP%s): APIC ID: %2d (disabled)\n", hyperthread, - i); - } -} - -/* * AP CPU's call this to initialize themselves. */ void @@ -739,8 +1318,7 @@ init_secondary(void) printf("SMP: AP CPU #%d Launched!\n", cpuid); /* Determine if we are a logical CPU. */ - /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ - if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) + if (cpu_info[PCPU_GET(apic_id)].cpu_hyperthread) CPU_SET(cpuid, &logical_cpus_mask); if (bootverbose) @@ -802,85 +1380,13 @@ set_interrupt_apic_ids(void) continue; /* Don't let hyperthreads service interrupts. */ - if (hyperthreading_cpus > 1 && - apic_id % hyperthreading_cpus != 0) + if (cpu_info[apic_id].cpu_hyperthread) continue; intr_add_cpu(i); } } -/* - * Assign logical CPU IDs to local APICs. - */ -static void -assign_cpu_ids(void) -{ - u_int i; - - TUNABLE_INT_FETCH("machdep.hyperthreading_allowed", - &hyperthreading_allowed); - - /* Check for explicitly disabled CPUs. */ - for (i = 0; i <= MAX_APIC_ID; i++) { - if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp) - continue; - - if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { - cpu_info[i].cpu_hyperthread = 1; - - /* - * Don't use HT CPU if it has been disabled by a - * tunable. - */ - if (hyperthreading_allowed == 0) { - cpu_info[i].cpu_disabled = 1; - continue; - } - } - - /* Don't use this CPU if it has been disabled by a tunable. */ - if (resource_disabled("lapic", i)) { - cpu_info[i].cpu_disabled = 1; - continue; - } - } - - if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { - hyperthreading_cpus = 0; - cpu_logical = 1; - } - - /* - * Assign CPU IDs to local APIC IDs and disable any CPUs - * beyond MAXCPU. CPU 0 is always assigned to the BSP. - * - * To minimize confusion for userland, we attempt to number - * CPUs such that all threads and cores in a package are - * grouped together. For now we assume that the BSP is always - * the first thread in a package and just start adding APs - * starting with the BSP's APIC ID. - */ - mp_ncpus = 1; - cpu_apic_ids[0] = boot_cpu_id; - apic_cpuids[boot_cpu_id] = 0; - for (i = boot_cpu_id + 1; i != boot_cpu_id; - i == MAX_APIC_ID ? i = 0 : i++) { - if (!cpu_info[i].cpu_present || cpu_info[i].cpu_bsp || - cpu_info[i].cpu_disabled) - continue; - - if (mp_ncpus < MAXCPU) { - cpu_apic_ids[mp_ncpus] = i; - apic_cpuids[i] = mp_ncpus; - mp_ncpus++; - } else - cpu_info[i].cpu_disabled = 1; - } - KASSERT(mp_maxid >= mp_ncpus - 1, - ("%s: counters out of sync: max %d, count %d", __func__, mp_maxid, - mp_ncpus)); -} /* * start each AP in our list -- 1.7.6