From f6cd9e912d48564859265eefa7791714318cd827 Mon Sep 17 00:00:00 2001 From: Conrad Meyer Date: Sun, 6 Jan 2019 13:52:21 -0800 Subject: [PATCH] amdtemp(4): Add support for Family 15h, Model >=60h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Family 15h is a bit of an oddball. Early models used the same temperature register and spec (mostly[1]) as earlier CPU families. Model 60h-6Fh and 70-7Fh use something more like Family 17h's Service Management Network, communicating with it in a similar fashion. To support them, add support for their version of SMU indirection to amdsmn(4) and use it in amdtemp(4) on these models. While here, clarify some of the deviceid macros in amdtemp(4) that were added with arbitrary, incorrect family numbers, and remove ones that were not used. Additionally, clarify intent and condition of heterogenous multi-socket system detection. [1]: 15h adds the "adjust range by -49°C if a certain condition is met," which previous families did not have. PR: 234657 Reported by: D. C. --- sys/dev/amdsmn/amdsmn.c | 70 +++++++++++---- sys/dev/amdtemp/amdtemp.c | 179 +++++++++++++++++++++++++++++--------- 2 files changed, 188 insertions(+), 61 deletions(-) diff --git a/sys/dev/amdsmn/amdsmn.c b/sys/dev/amdsmn/amdsmn.c index 245c375ac202..520034e1284a 100644 --- a/sys/dev/amdsmn/amdsmn.c +++ b/sys/dev/amdsmn/amdsmn.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2017 Conrad Meyer + * Copyright (c) 2017-2019 Conrad Meyer * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -25,7 +25,7 @@ */ /* - * Driver for the AMD Family 17h CPU System Management Network. + * Driver for the AMD Family 15h and 17h CPU System Management Network. */ #include @@ -51,24 +51,45 @@ __FBSDID("$FreeBSD$"); #include -#define SMN_ADDR_REG 0x60 -#define SMN_DATA_REG 0x64 +#define F15H_SMN_ADDR_REG 0xb8 +#define F15H_SMN_DATA_REG 0xbc +#define F17H_SMN_ADDR_REG 0x60 +#define F17H_SMN_DATA_REG 0x64 +#define PCI_DEVICE_ID_AMD_15H_M60H_ROOT 0x1576 #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 -#define PCI_DEVICE_ID_AMD_17H_ROOT_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 -#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT_DF_F3 0x15eb +struct pciid; struct amdsmn_softc { struct mtx smn_lock; + const struct pciid *smn_pciid; }; -static struct pciid { +static const struct pciid { uint16_t amdsmn_vendorid; uint16_t amdsmn_deviceid; + uint8_t amdsmn_addr_reg; + uint8_t amdsmn_data_reg; } amdsmn_ids[] = { - { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_ROOT }, - { CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT }, + { + .amdsmn_vendorid = CPU_VENDOR_AMD, + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_15H_M60H_ROOT, + .amdsmn_addr_reg = F15H_SMN_ADDR_REG, + .amdsmn_data_reg = F15H_SMN_DATA_REG, + }, + { + .amdsmn_vendorid = CPU_VENDOR_AMD, + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_17H_ROOT, + .amdsmn_addr_reg = F17H_SMN_ADDR_REG, + .amdsmn_data_reg = F17H_SMN_DATA_REG, + }, + { + .amdsmn_vendorid = CPU_VENDOR_AMD, + .amdsmn_deviceid = PCI_DEVICE_ID_AMD_17H_M10H_ROOT, + .amdsmn_addr_reg = F17H_SMN_ADDR_REG, + .amdsmn_data_reg = F17H_SMN_DATA_REG, + }, }; /* @@ -101,7 +122,7 @@ MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmn, amdsmn_ids, nitems(amdsmn_ids)); static bool -amdsmn_match(device_t parent) +amdsmn_match(device_t parent, const struct pciid **pciid_out) { uint16_t vendor, device; size_t i; @@ -109,10 +130,14 @@ amdsmn_match(device_t parent) vendor = pci_get_vendor(parent); device = pci_get_device(parent); - for (i = 0; i < nitems(amdsmn_ids); i++) + for (i = 0; i < nitems(amdsmn_ids); i++) { if (vendor == amdsmn_ids[i].amdsmn_vendorid && - device == amdsmn_ids[i].amdsmn_deviceid) + device == amdsmn_ids[i].amdsmn_deviceid) { + if (pciid_out != NULL) + *pciid_out = &amdsmn_ids[i]; return (true); + } + } return (false); } @@ -124,7 +149,7 @@ amdsmn_identify(driver_t *driver, device_t parent) /* Make sure we're not being doubly invoked. */ if (device_find_child(parent, "amdsmn", -1) != NULL) return; - if (!amdsmn_match(parent)) + if (!amdsmn_match(parent, NULL)) return; child = device_add_child(parent, "amdsmn", -1); @@ -136,21 +161,25 @@ static int amdsmn_probe(device_t dev) { uint32_t family; + char buf[64]; if (resource_disabled("amdsmn", 0)) return (ENXIO); - if (!amdsmn_match(device_get_parent(dev))) + if (!amdsmn_match(device_get_parent(dev), NULL)) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); switch (family) { + case 0x15: case 0x17: break; default: return (ENXIO); } - device_set_desc(dev, "AMD Family 17h System Management Network"); + snprintf(buf, sizeof(buf), "AMD Family %xh System Management Network", + family); + device_set_desc_copy(dev, buf); return (BUS_PROBE_GENERIC); } @@ -160,6 +189,9 @@ amdsmn_attach(device_t dev) { struct amdsmn_softc *sc = device_get_softc(dev); + if (!amdsmn_match(device_get_parent(dev), &sc->smn_pciid)) + return (ENXIO); + mtx_init(&sc->smn_lock, "SMN mtx", "SMN", MTX_DEF); return (0); } @@ -182,8 +214,8 @@ amdsmn_read(device_t dev, uint32_t addr, uint32_t *value) parent = device_get_parent(dev); mtx_lock(&sc->smn_lock); - pci_write_config(parent, SMN_ADDR_REG, addr, 4); - *value = pci_read_config(parent, SMN_DATA_REG, 4); + pci_write_config(parent, sc->smn_pciid->amdsmn_addr_reg, addr, 4); + *value = pci_read_config(parent, sc->smn_pciid->amdsmn_data_reg, 4); mtx_unlock(&sc->smn_lock); return (0); @@ -198,8 +230,8 @@ amdsmn_write(device_t dev, uint32_t addr, uint32_t value) parent = device_get_parent(dev); mtx_lock(&sc->smn_lock); - pci_write_config(parent, SMN_ADDR_REG, addr, 4); - pci_write_config(parent, SMN_DATA_REG, value, 4); + pci_write_config(parent, sc->smn_pciid->amdsmn_addr_reg, addr, 4); + pci_write_config(parent, sc->smn_pciid->amdsmn_data_reg, value, 4); mtx_unlock(&sc->smn_lock); return (0); diff --git a/sys/dev/amdtemp/amdtemp.c b/sys/dev/amdtemp/amdtemp.c index 2c7afcd0c10b..5c5f7ebee291 100644 --- a/sys/dev/amdtemp/amdtemp.c +++ b/sys/dev/amdtemp/amdtemp.c @@ -5,6 +5,8 @@ * Copyright (c) 2009 Norikatsu Shigemura * Copyright (c) 2009-2012 Jung-uk Kim * All rights reserved. + * Copyright (c) 2017-2019 Conrad Meyer + * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -76,43 +78,66 @@ struct amdtemp_softc { device_t sc_smn; }; +/* + * N.B. The numbers in macro names below are significant and represent CPU + * family and model numbers. Do not make up fictitious family or model numbers + * when adding support for new devices. + */ #define VENDORID_AMD 0x1022 #define DEVICEID_AMD_MISC0F 0x1103 #define DEVICEID_AMD_MISC10 0x1203 #define DEVICEID_AMD_MISC11 0x1303 -#define DEVICEID_AMD_MISC12 0x1403 #define DEVICEID_AMD_MISC14 0x1703 #define DEVICEID_AMD_MISC15 0x1603 +#define DEVICEID_AMD_MISC15_M10H 0x1403 +#define DEVICEID_AMD_MISC15_M30H 0x141d +#define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576 #define DEVICEID_AMD_MISC16 0x1533 #define DEVICEID_AMD_MISC16_M30H 0x1583 -#define DEVICEID_AMD_MISC17 0x141d #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450 -#define DEVICEID_AMD_HOSTB17H_DF_F3 0x1463 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0 -#define DEVICEID_AMD_HOSTB17H_M10H_DF_F3 0x15eb -static struct amdtemp_product { +static const struct amdtemp_product { uint16_t amdtemp_vendorid; uint16_t amdtemp_deviceid; + /* + * 0xFC register is only valid on the D18F3 PCI device; SMN temp + * drivers do not attach to that device. + */ + bool amdtemp_has_cpuid; } amdtemp_products[] = { - { VENDORID_AMD, DEVICEID_AMD_MISC0F }, - { VENDORID_AMD, DEVICEID_AMD_MISC10 }, - { VENDORID_AMD, DEVICEID_AMD_MISC11 }, - { VENDORID_AMD, DEVICEID_AMD_MISC12 }, - { VENDORID_AMD, DEVICEID_AMD_MISC14 }, - { VENDORID_AMD, DEVICEID_AMD_MISC15 }, - { VENDORID_AMD, DEVICEID_AMD_MISC16 }, - { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H }, - { VENDORID_AMD, DEVICEID_AMD_MISC17 }, - { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT }, - { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT }, + { VENDORID_AMD, DEVICEID_AMD_MISC0F, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC10, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC11, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC14, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC15, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false }, + { VENDORID_AMD, DEVICEID_AMD_MISC16, true }, + { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false }, + { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false }, }; /* - * Reported Temperature Control Register + * Reported Temperature Control Register, family 0Fh-15h (some models), 16h. */ #define AMDTEMP_REPTMP_CTRL 0xa4 +#define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff +#define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21 +#define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3 +#define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16 + +/* + * Reported Temperature, Family 15h, M60+ + * + * Same register bit definitions as other Family 15h CPUs, but access is + * indirect via SMN, like Family 17h. + */ +#define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4 + /* * Reported Temperature, Family 17h * @@ -123,7 +148,11 @@ static struct amdtemp_product { */ #define AMDTEMP_17H_CUR_TMP 0x59800 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1 << 19) -#define AMDTEMP_17H_CUR_TMP_RANGE_OFF 490 + +/* + * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius). + */ +#define AMDTEMP_CURTMP_RANGE_ADJUST 490 /* * Thermaltrip Status Register (Family 0Fh only) @@ -151,9 +180,9 @@ static int amdtemp_probe(device_t dev); static int amdtemp_attach(device_t dev); static void amdtemp_intrhook(void *arg); static int amdtemp_detach(device_t dev); -static int amdtemp_match(device_t dev); static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor); +static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor); static int32_t amdtemp_gettemp17h(device_t dev, amdsensor_t sensor); static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS); @@ -180,8 +209,8 @@ MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1); MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products, nitems(amdtemp_products)); -static int -amdtemp_match(device_t dev) +static bool +amdtemp_match(device_t dev, const struct amdtemp_product **product_out) { int i; uint16_t vendor, devid; @@ -191,11 +220,13 @@ amdtemp_match(device_t dev) for (i = 0; i < nitems(amdtemp_products); i++) { if (vendor == amdtemp_products[i].amdtemp_vendorid && - devid == amdtemp_products[i].amdtemp_deviceid) - return (1); + devid == amdtemp_products[i].amdtemp_deviceid) { + if (product_out != NULL) + *product_out = &amdtemp_products[i]; + return (true); + } } - - return (0); + return (false); } static void @@ -207,7 +238,7 @@ amdtemp_identify(driver_t *driver, device_t parent) if (device_find_child(parent, "amdtemp", -1) != NULL) return; - if (amdtemp_match(parent)) { + if (amdtemp_match(parent, NULL)) { child = device_add_child(parent, "amdtemp", -1); if (child == NULL) device_printf(parent, "add amdtemp child failed\n"); @@ -221,7 +252,7 @@ amdtemp_probe(device_t dev) if (resource_disabled("amdtemp", 0)) return (ENXIO); - if (!amdtemp_match(device_get_parent(dev))) + if (!amdtemp_match(device_get_parent(dev), NULL)) return (ENXIO); family = CPUID_TO_FAMILY(cpu_id); @@ -254,23 +285,42 @@ amdtemp_attach(device_t dev) { char tn[32]; u_int regs[4]; - struct amdtemp_softc *sc = device_get_softc(dev); + const struct amdtemp_product *product; + struct amdtemp_softc *sc; struct sysctl_ctx_list *sysctlctx; struct sysctl_oid *sysctlnode; uint32_t cpuid, family, model; u_int bid; int erratum319, unit; + bool needsmn; + sc = device_get_softc(dev); erratum319 = 0; + needsmn = false; + + if (!amdtemp_match(device_get_parent(dev), &product)) + return (ENXIO); - /* - * CPUID Register is available from Revision F. - */ cpuid = cpu_id; family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); - if ((family != 0x0f || model >= 0x40) && family != 0x17) { - cpuid = pci_read_config(dev, AMDTEMP_CPUID, 4); + + /* + * This checks for the byzantine condition of running a heterogenous + * revision multi-socket system where the attach thread is potentially + * probing a remote socket's PCI device. + * + * Currently, such scenarios are unsupported on models using the SMN + * (because on those models, amdtemp(4) attaches to a different PCI + * device than the one that contains AMDTEMP_CPUID). + * + * The ancient 0x0F family of devices only supports this register from + * models 40h+. + */ + if (product->amdtemp_has_cpuid && (family > 0x0f || + (family == 0x0f && model >= 0x40))) { + cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID, + 4); family = CPUID_TO_FAMILY(cpuid); model = CPUID_TO_MODEL(cpuid); } @@ -364,16 +414,30 @@ amdtemp_attach(device_t dev) case 0x14: case 0x15: case 0x16: + sc->sc_ntemps = 1; /* - * There is only one sensor per package. + * Some later (60h+) models of family 15h use a similar SMN + * network as family 17h. (However, the register index differs + * from 17h and the decoding matches other 10h-15h models, + * which differ from 17h.) */ - sc->sc_ntemps = 1; - - sc->sc_gettemp = amdtemp_gettemp; + if (family == 0x15 && model >= 0x60) { + sc->sc_gettemp = amdtemp_gettemp15hm60h; + needsmn = true; + } else + sc->sc_gettemp = amdtemp_gettemp; break; case 0x17: sc->sc_ntemps = 1; sc->sc_gettemp = amdtemp_gettemp17h; + needsmn = true; + break; + default: + device_printf(dev, "Bogus family 0x%x\n", family); + return (ENXIO); + } + + if (needsmn) { sc->sc_smn = device_find_child( device_get_parent(dev), "amdsmn", -1); if (sc->sc_smn == NULL) { @@ -381,7 +445,6 @@ amdtemp_attach(device_t dev) device_printf(dev, "No SMN device found\n"); return (ENXIO); } - break; } /* Find number of cores per package. */ @@ -585,6 +648,29 @@ amdtemp_gettemp0f(device_t dev, amdsensor_t sensor) return (temp); } +static uint32_t +amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val) +{ + uint32_t temp; + + /* Convert raw register subfield units (0.125C) to units of 0.1C. */ + temp = ((val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT) & + AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4; + + /* + * On Family 15h and higher, if CurTmpTjSel is 11b, the range is + * adjusted down by 49.0 degrees Celsius. (This adjustment is not + * documented in BKDGs prior to family 15h model 00h.) + */ + if (CPUID_TO_FAMILY(cpu_id) >= 0x15 && + ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) & + AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3) + temp -= AMDTEMP_CURTMP_RANGE_ADJUST; + + temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10; + return (temp); +} + static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor) { @@ -592,10 +678,19 @@ amdtemp_gettemp(device_t dev, amdsensor_t sensor) uint32_t temp; temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4); - temp = ((temp >> 21) & 0x7ff) * 5 / 4; - temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; + return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp)); +} - return (temp); +static int32_t +amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor) +{ + struct amdtemp_softc *sc = device_get_softc(dev); + uint32_t val; + int error; + + error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val); + KASSERT(error == 0, ("amdsmn_read")); + return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val)); } static int32_t @@ -610,7 +705,7 @@ amdtemp_gettemp17h(device_t dev, amdsensor_t sensor) temp = ((val >> 21) & 0x7ff) * 5 / 4; if ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0) - temp -= AMDTEMP_17H_CUR_TMP_RANGE_OFF; + temp -= AMDTEMP_CURTMP_RANGE_ADJUST; temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10; return (temp); -- 2.19.1