diff --new-file -rup /usr/src/usr.sbin/powerd/Makefile FreeBSD/usr.sbin/powerd/Makefile --- /usr/src/usr.sbin/powerd/Makefile 2023-08-17 19:24:20.944730000 -0400 +++ FreeBSD/usr.sbin/powerd/Makefile 2023-10-23 16:16:42.003651000 -0400 @@ -2,6 +2,8 @@ MAN= powerd.8 PROG= powerd MAN= powerd.8 -LIBADD= util +SRCS= powerd.c blessed.c + +LIBADD= util kvm .include diff --new-file -rup /usr/src/usr.sbin/powerd/blessed.c FreeBSD/usr.sbin/powerd/blessed.c --- /usr/src/usr.sbin/powerd/blessed.c 1969-12-31 19:00:00.000000000 -0500 +++ FreeBSD/usr.sbin/powerd/blessed.c 2023-08-21 17:54:31.832511000 -0400 @@ -0,0 +1,247 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Netflix, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "blessed.h" + +struct blessed { + SLIST_ENTRY(blessed) next; + char *name; + pid_t pid; + int maxpct; + double cpu; +}; + +struct blessed_args { + SLIST_ENTRY(blessed_args) next; + char *name; + int maxpct; +}; + +static SLIST_HEAD(slisthead, blessed) proc_head = + SLIST_HEAD_INITIALIZER(proc_head); + +static SLIST_HEAD(slisthead_args, blessed_args) arg_head = + SLIST_HEAD_INITIALIZER(arg_head); + +static struct timeval bless_scan_time; +static const int bless_scan_interval = 120; +static kvm_t *kd = NULL; +static int fscale; + +static void +open_kvm(void) +{ + char kern[MAXPATHLEN]; + size_t sz; + + sz = sizeof(kern); + if (sysctlbyname("kern.bootfile", kern, &sz, NULL, 0)) + errx(1, "sysctl kern.bootfile"); + + kd = kvm_open(kern, "/dev/mem", NULL, O_RDONLY, NULL); + if (kd == NULL) + errx(1, "Could not open kvm for %s", kern); + + sz = sizeof(fscale); + if (sysctlbyname("kern.fscale", &fscale, &sz, NULL, 0) == -1) + errx(1, "kern.fscale"); +} + +/* + * Build a list of the names of we need to monitor, taken from the + * command line arguments passed to powerd. + */ +void +bless(char *name, long pct) +{ + struct blessed_args *b; + size_t sz; + static int ncpu; + + if (ncpu == 0) { + sz = sizeof(ncpu); + if (sysctlbyname("hw.ncpu", &ncpu, &sz, NULL, 0)) + errx(1, "sysctl kern.ncpu"); + } + + + b = calloc(1, sizeof(*b)); + if (b == NULL) + errx(1, "malloc(blessed))"); + b->name = name; + b->maxpct = 100.0 * ((double)pct / 100.0) * ncpu; + SLIST_INSERT_HEAD(&arg_head, b, next); +} + +/* + * Try to find the PIDs of all instances of the proccesses we monitor. + * This gets called every bless_scan_interval, or every time we notice + * that a monitored process has exited. + */ +static void +bless_scan(void) +{ + struct blessed *b, *b_tmp; + struct blessed_args *barg; + struct kinfo_proc *p; + int cnt, i; + + gettimeofday(&bless_scan_time, NULL); + if (kd == NULL) + open_kvm(); + + SLIST_FOREACH_SAFE(b, &proc_head, next, b_tmp) { + SLIST_REMOVE_HEAD(&proc_head, next); + free(b); + } + + p = kvm_getprocs(kd, KERN_PROC_PROC, 0, &cnt); + if (p == NULL || cnt == 0) + errx(1, "kvm_getprocs"); + + SLIST_FOREACH(barg, &arg_head, next) { + for (i = 0; i < cnt; i++) { + if (!strcmp(barg->name, p[i].ki_comm)) { + b = calloc(1, sizeof(*b)); + if (b == NULL) + errx(1, "malloc"); + b->pid = p[i].ki_pid; + b->name = barg->name; + b->maxpct = barg->maxpct; + if (vflag) + printf("found %s, pid %d\n", + b->name, b->pid); + SLIST_INSERT_HEAD(&proc_head, b, next); + } + } + } +} + + +static int +getcpu(struct blessed *b) +{ + struct kinfo_proc *pinfo; + int cnt; + + if (kd == NULL) { + open_kvm(); + } + /* + * Note that according to kvm_getprocs(3): + * This memory is locally allocated, and subsequent calls to + * kvm_getprocs() ... will overwrite this storage. + */ + pinfo = kvm_getprocs(kd, KERN_PROC_PID, b->pid, &cnt); + if (pinfo == NULL || cnt == 0 || + (strcmp(b->name, pinfo->ki_comm) != 0)) { + return (ENOENT); + } + if (pinfo->ki_swtime == 0 || (pinfo->ki_flag & P_INMEM) == 0) { + b->cpu = 0.0; + } else { + /* fscale is magic taken from ps's print.c */ + b->cpu = 100 * (((double)pinfo->ki_pctcpu) / fscale); + if (vflag) + printf("%s cpu = %lf, max=%lf\n", + b->name, b->cpu, (double)b->maxpct); + } + return (0); +} + +int +check_blessed(int load, int curfreq, int setpoint) +{ + struct timeval now; + struct blessed *b, *b_tmp; + static unsigned int calls = 0; + static double cached_cpu_factor; + static int last_freq; + double cpu_factor, new_load; + static bool need_rescan = false; + + if (load > setpoint || SLIST_EMPTY(&arg_head)) + return (load); + /* + * Return cached results except every 10th time to save CPU. + * Re-run if the frequency changed, since our CPU will be + * different. XXX: Maybe make skip count configurable if needed. + */ + if (last_freq != curfreq || calls++ % 10 == 0) { + gettimeofday(&now, NULL); + if (need_rescan || + now.tv_sec > bless_scan_time.tv_sec + bless_scan_interval) { + bless_scan(); + need_rescan = false; + } + last_freq = curfreq; + } else { + goto done; + } + cached_cpu_factor = 1.0; + SLIST_FOREACH_SAFE(b, &proc_head, next, b_tmp) { + if (getcpu(b)) { + if (vflag) + printf("%s pid %d exited\n", + b->name, b->pid); + SLIST_REMOVE(&proc_head, b, blessed, next); + free(b); + need_rescan = true; + continue; + } + cpu_factor = (double)b->cpu / (double)b->maxpct; + cpu_factor = sqrt(cpu_factor) + 1.0; + if (cpu_factor > cached_cpu_factor) { + if (vflag) + printf("%s: %lf %d -> %lf\n", + b->name, cpu_factor, load, + (double)load * cpu_factor); + cached_cpu_factor = cpu_factor; + } + } + +done: + new_load = (double)load * cached_cpu_factor; + new_load = MIN(new_load, (double)setpoint + 1.0); + + return ((int)new_load); +} diff --new-file -rup /usr/src/usr.sbin/powerd/blessed.h FreeBSD/usr.sbin/powerd/blessed.h --- /usr/src/usr.sbin/powerd/blessed.h 1969-12-31 19:00:00.000000000 -0500 +++ FreeBSD/usr.sbin/powerd/blessed.h 2023-08-21 17:54:31.833155000 -0400 @@ -0,0 +1,32 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023 Netflix Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted providing that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +extern int vflag; + +void bless(char *name, long pct); +int check_blessed(int load, int curfreq, int maxfreq); + diff --new-file -rup /usr/src/usr.sbin/powerd/powerd.8 FreeBSD/usr.sbin/powerd/powerd.8 --- /usr/src/usr.sbin/powerd/powerd.8 2023-08-17 19:24:20.944829000 -0400 +++ FreeBSD/usr.sbin/powerd/powerd.8 2023-10-23 16:16:42.003775000 -0400 @@ -41,6 +41,8 @@ .Op Fl p Ar ival .Op Fl r Ar percent .Op Fl s Ar source +.Op Fl S +.Op Fl t .Op Fl v .Sh DESCRIPTION The @@ -127,6 +129,15 @@ and and .Cm apm (i386 only). +.It Fl S +Scales load to 100% no matter how many cores are present in the system. +Without this flag, powerd will keep the clock speed elevated on +systems with a high core count if a small number of cores are busy. +.It Fl t +Forbid turbo. +Some CPUs enter "turbo" mode and allow higher clocking when a freqency +whose least significant digit is one are selected. +This flag prevents selection of "turbo" frequencies. .It Fl v Verbose mode. Messages about power changes will be printed to stdout and diff --new-file -rup /usr/src/usr.sbin/powerd/powerd.c FreeBSD/usr.sbin/powerd/powerd.c --- /usr/src/usr.sbin/powerd/powerd.c 2023-11-27 20:34:01.057112000 -0500 +++ FreeBSD/usr.sbin/powerd/powerd.c 2024-01-10 10:41:20.176461000 -0500 @@ -54,6 +54,8 @@ #include #endif +#include "blessed.h" + #define DEFAULT_ACTIVE_PERCENT 75 #define DEFAULT_IDLE_PERCENT 50 #define DEFAULT_POLL_INTERVAL 250 /* Poll interval in milliseconds */ @@ -83,9 +85,9 @@ static const char *modes[] = { #define DEVDPIPE "/var/run/devd.pipe" #define DEVCTL_MAXBUF 1024 -static int read_usage_times(int *load, int nonice); +static int read_usage_times(int *load, int nonice, int scale); static int read_freqs(int *numfreqs, int **freqs, int **power, - int minfreq, int maxfreq); + int minfreq, int maxfreq, int turbo); static int set_freq(int freq); static void acline_init(void); static void acline_read(void); @@ -106,7 +108,7 @@ static int poll_ival; static int cpu_running_mark; static int cpu_idle_mark; static int poll_ival; -static int vflag; +int vflag; static volatile sig_atomic_t exit_requested; static power_src_t acline_status; @@ -137,7 +139,7 @@ static int * nice time will be treated as if idle. */ static int -read_usage_times(int *load, int nonice) +read_usage_times(int *load, int nonice, int scale) { static long *cp_times = NULL, *cp_times_old = NULL; static int ncpus = 0; @@ -181,6 +183,8 @@ read_usage_times(int *load, int nonice) cp_times_old[cpu * CPUSTATES + CP_NICE]; *load += 100 - excl * 100 / total; } + if (scale) + *load = *load / ncpus; } memcpy(cp_times_old, cp_times, cp_times_len); @@ -189,7 +193,8 @@ static int } static int -read_freqs(int *numfreqs, int **freqs, int **power, int minfreq, int maxfreq) +read_freqs(int *numfreqs, int **freqs, int **power, int minfreq, + int maxfreq, int turbo) { char *freqstr, *p, *q; int i, j; @@ -229,7 +234,8 @@ read_freqs(int *numfreqs, int **freqs, int **power, in return (-1); } if (((*freqs)[j] >= minfreq || minfreq == -1) && - ((*freqs)[j] <= maxfreq || maxfreq == -1)) + ((*freqs)[j] <= maxfreq || maxfreq == -1) && + ((*freqs)[j] % 10 != 1 || turbo)) j++; p = q + 1; } @@ -476,7 +482,7 @@ usage(void) { fprintf(stderr, -"usage: powerd [-v] [-a mode] [-b mode] [-i %%] [-m freq] [-M freq] [-N] [-n mode] [-p ival] [-r %%] [-s source] [-P pidfile]\n"); +"usage: powerd [-v] [-S] [-t] [-a mode] [-b mode] [-i %%] [-m freq] [-M freq] [-N] [-n mode] [-p ival] [-r %%] [-s source] [-P pidfile] [-B blessed_process percent_cores\n"); exit(1); } @@ -493,7 +499,9 @@ main(int argc, char * argv[]) int ch, mode, mode_ac, mode_battery, mode_none, idle, to; uint64_t mjoules_used; size_t len; - int nonice; + int nonice, scale, turbo; + char *name; + long pct; /* Default mode for all AC states is adaptive. */ mode_ac = mode_none = MODE_HIADAPTIVE; @@ -504,12 +512,14 @@ main(int argc, char * argv[]) mjoules_used = 0; vflag = 0; nonice = 0; + turbo = 1; + scale = 0; /* User must be root to control frequencies. */ if (geteuid() != 0) errx(1, "must be root to run"); - while ((ch = getopt(argc, argv, "a:b:i:m:M:Nn:p:P:r:s:v")) != -1) + while ((ch = getopt(argc, argv, "a:b:B:i:m:M:Nn:p:P:r:s:Stv")) != -1) switch (ch) { case 'a': parse_mode(optarg, &mode_ac, ch); @@ -517,6 +527,18 @@ main(int argc, char * argv[]) case 'b': parse_mode(optarg, &mode_battery, ch); break; + case 'B': + name = optarg; + if (optind < argc && *argv[optind] != '-') { + pct = strtol(argv[optind], NULL, 0); + if (pct < 1 || pct > 100) + usage(); + optind++; + bless(name, pct); + } else { + usage(); + } + break; case 's': parse_acline_mode(optarg, ch); break; @@ -568,6 +590,12 @@ main(int argc, char * argv[]) usage(); } break; + case 'S': + scale = 1; + break; + case 't': + turbo = 0; + break; case 'v': vflag = 1; break; @@ -592,9 +620,9 @@ main(int argc, char * argv[]) err(1, "lookup freq_levels"); /* Check if we can read the load and supported freqs. */ - if (read_usage_times(NULL, nonice)) + if (read_usage_times(NULL, nonice, scale)) err(1, "read_usage_times"); - if (read_freqs(&numfreqs, &freqs, &mwatts, minfreq, maxfreq)) + if (read_freqs(&numfreqs, &freqs, &mwatts, minfreq, maxfreq, turbo)) err(1, "error reading supported CPU frequencies"); if (numfreqs == 0) errx(1, "no CPU frequencies in user-specified range"); @@ -774,12 +802,13 @@ main(int argc, char * argv[]) } /* Adaptive mode; get the current CPU usage times. */ - if (read_usage_times(&load, nonice)) { + if (read_usage_times(&load, nonice, scale)) { if (vflag) warn("read_usage_times() failed"); continue; } + load = check_blessed(load, curfreq, cpu_running_mark); if (mode == MODE_ADAPTIVE) { if (load > cpu_running_mark) { if (load > 95 || load > cpu_running_mark * 2)