diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /tools/power/x86 | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
18 files changed, 15676 insertions, 0 deletions
diff --git a/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py new file mode 100755 index 000000000..2dea4032a --- /dev/null +++ b/tools/power/x86/amd_pstate_tracer/amd_pstate_trace.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# -*- coding: utf-8 -*- +# +""" This utility can be used to debug and tune the performance of the +AMD P-State driver. It imports intel_pstate_tracer to analyze AMD P-State +trace event. + +Prerequisites: + Python version 2.7.x or higher + gnuplot 5.0 or higher + gnuplot-py 1.8 or higher + (Most of the distributions have these required packages. They may be called + gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... ) + + Kernel config for Linux trace is enabled + + see print_help(): for Usage and Output details + +""" +from __future__ import print_function +from datetime import datetime +import subprocess +import os +import time +import re +import signal +import sys +import getopt +import Gnuplot +from numpy import * +from decimal import * +sys.path.append('../intel_pstate_tracer') +#import intel_pstate_tracer +import intel_pstate_tracer as ipt + +__license__ = "GPL version 2" + +MAX_CPUS = 256 +# Define the csv file columns +C_COMM = 15 +C_ELAPSED = 14 +C_SAMPLE = 13 +C_DURATION = 12 +C_LOAD = 11 +C_TSC = 10 +C_APERF = 9 +C_MPERF = 8 +C_FREQ = 7 +C_MAX_PERF = 6 +C_DES_PERF = 5 +C_MIN_PERF = 4 +C_USEC = 3 +C_SEC = 2 +C_CPU = 1 + +global sample_num, last_sec_cpu, last_usec_cpu, start_time, test_name, trace_file + +getcontext().prec = 11 + +sample_num =0 +last_sec_cpu = [0] * MAX_CPUS +last_usec_cpu = [0] * MAX_CPUS + +def plot_per_cpu_freq(cpu_index): + """ Plot per cpu frequency """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_frequency.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:7]') + g_plot('set ytics 0, 1') + g_plot('set ylabel "CPU Frequency (GHz)"') + g_plot('set title "{} : frequency : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set ylabel "CPU frequency"') + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_FREQ)) + +def plot_per_cpu_des_perf(cpu_index): + """ Plot per cpu desired perf """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_des_perf.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:255]') + g_plot('set ylabel "des perf"') + g_plot('set title "{} : cpu des perf : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DES_PERF)) + +def plot_per_cpu_load(cpu_index): + """ Plot per cpu load """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_load.png" % cpu_index + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:100]') + g_plot('set ytics 0, 10') + g_plot('set ylabel "CPU load (percent)"') + g_plot('set title "{} : cpu load : CPU {:0>3} : {:%F %H:%M}"'.format(test_name, cpu_index, datetime.now())) + g_plot('set key off') + ipt.set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD)) + +def plot_all_cpu_frequency(): + """ Plot all cpu frequencies """ + + output_png = 'all_cpu_frequencies.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set ylabel "CPU Frequency (GHz)"') + g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_all_cpu_des_perf(): + """ Plot all cpu desired perf """ + + output_png = 'all_cpu_des_perf.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set ylabel "des perf"') + g_plot('set title "{} : cpu des perf : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_DES_PERF) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_all_cpu_load(): + """ Plot all cpu load """ + + output_png = 'all_cpu_load.png' + g_plot = ipt.common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + g_plot('set yrange [0:100]') + g_plot('set ylabel "CPU load (percent)"') + g_plot('set title "{} : cpu load : {:%F %H:%M}"'.format(test_name, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 255 ps 1 title i".format(C_ELAPSED, C_LOAD) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask): + """ Store master csv file information """ + + global graph_data_present + + if cpu_mask[cpu_int] == 0: + return + + try: + f_handle = open('cpu.csv', 'a') + string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %.4f, %u, %u, %u, %.2f, %.3f, %u, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(min_perf), int(des_perf), int(max_perf), freq_ghz, int(mperf), int(aperf), int(tsc), load, duration_ms, sample_num, elapsed_time, common_comm) + f_handle.write(string_buffer) + f_handle.close() + except: + print('IO error cpu.csv') + return + + graph_data_present = True; + + +def cleanup_data_files(): + """ clean up existing data files """ + + if os.path.exists('cpu.csv'): + os.remove('cpu.csv') + f_handle = open('cpu.csv', 'a') + f_handle.write('common_cpu, common_secs, common_usecs, min_perf, des_perf, max_perf, freq, mperf, aperf, tsc, load, duration_ms, sample_num, elapsed_time, common_comm') + f_handle.write('\n') + f_handle.close() + +def read_trace_data(file_name, cpu_mask): + """ Read and parse trace data """ + + global current_max_cpu + global sample_num, last_sec_cpu, last_usec_cpu, start_time + + try: + data = open(file_name, 'r').read() + except: + print('Error opening ', file_name) + sys.exit(2) + + for line in data.splitlines(): + search_obj = \ + re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?amd_min_perf=)(\d+)(.*?amd_des_perf=)(\d+)(.*?amd_max_perf=)(\d+)(.*?freq=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)' + , line) + + if search_obj: + cpu = search_obj.group(3) + cpu_int = int(cpu) + cpu = str(cpu_int) + + time_pre_dec = search_obj.group(6) + time_post_dec = search_obj.group(8) + min_perf = search_obj.group(10) + des_perf = search_obj.group(12) + max_perf = search_obj.group(14) + freq = search_obj.group(16) + mperf = search_obj.group(18) + aperf = search_obj.group(20) + tsc = search_obj.group(22) + + common_comm = search_obj.group(2).replace(' ', '') + + if sample_num == 0 : + start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) + sample_num += 1 + + if last_sec_cpu[cpu_int] == 0 : + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + else : + duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int])) + duration_ms = Decimal(duration_us) / Decimal(1000) + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time + load = Decimal(int(mperf)*100)/ Decimal(tsc) + freq_ghz = Decimal(freq)/Decimal(1000000) + store_csv(cpu_int, time_pre_dec, time_post_dec, min_perf, des_perf, max_perf, freq_ghz, mperf, aperf, tsc, common_comm, load, duration_ms, sample_num, elapsed_time, cpu_mask) + + if cpu_int > current_max_cpu: + current_max_cpu = cpu_int +# Now separate the main overall csv file into per CPU csv files. + ipt.split_csv(current_max_cpu, cpu_mask) + + +def signal_handler(signal, frame): + print(' SIGINT: Forcing cleanup before exit.') + if interval: + ipt.disable_trace(trace_file) + ipt.clear_trace_file() + ipt.free_trace_buffer() + sys.exit(0) + +trace_file = "/sys/kernel/debug/tracing/events/amd_cpu/enable" +signal.signal(signal.SIGINT, signal_handler) + +interval = "" +file_name = "" +cpu_list = "" +test_name = "" +memory = "10240" +graph_data_present = False; + +valid1 = False +valid2 = False + +cpu_mask = zeros((MAX_CPUS,), dtype=int) + + +try: + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) +except getopt.GetoptError: + ipt.print_help('amd_pstate') + sys.exit(2) +for opt, arg in opts: + if opt == '-h': + print() + sys.exit() + elif opt in ("-t", "--trace_file"): + valid1 = True + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + file_name = os.path.join(location, arg) + elif opt in ("-i", "--interval"): + valid1 = True + interval = arg + elif opt in ("-c", "--cpu"): + cpu_list = arg + elif opt in ("-n", "--name"): + valid2 = True + test_name = arg + elif opt in ("-m", "--memory"): + memory = arg + +if not (valid1 and valid2): + ipt.print_help('amd_pstate') + sys.exit() + +if cpu_list: + for p in re.split("[,]", cpu_list): + if int(p) < MAX_CPUS : + cpu_mask[int(p)] = 1 +else: + for i in range (0, MAX_CPUS): + cpu_mask[i] = 1 + +if not os.path.exists('results'): + os.mkdir('results') + ipt.fix_ownership('results') + +os.chdir('results') +if os.path.exists(test_name): + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') + sys.exit() +os.mkdir(test_name) +ipt.fix_ownership(test_name) +os.chdir(test_name) + +cur_version = sys.version_info +print('python version (should be >= 2.7):') +print(cur_version) + +cleanup_data_files() + +if interval: + file_name = "/sys/kernel/debug/tracing/trace" + ipt.clear_trace_file() + ipt.set_trace_buffer_size(memory) + ipt.enable_trace(trace_file) + time.sleep(int(interval)) + ipt.disable_trace(trace_file) + +current_max_cpu = 0 + +read_trace_data(file_name, cpu_mask) + +if interval: + ipt.clear_trace_file() + ipt.free_trace_buffer() + +if graph_data_present == False: + print('No valid data to plot') + sys.exit(2) + +for cpu_no in range(0, current_max_cpu + 1): + plot_per_cpu_freq(cpu_no) + plot_per_cpu_des_perf(cpu_no) + plot_per_cpu_load(cpu_no) + +plot_all_cpu_des_perf() +plot_all_cpu_frequency() +plot_all_cpu_load() + +for root, dirs, files in os.walk('.'): + for f in files: + ipt.fix_ownership(f) + +os.chdir('../../') diff --git a/tools/power/x86/intel-speed-select/.gitignore b/tools/power/x86/intel-speed-select/.gitignore new file mode 100644 index 000000000..a814f89fe --- /dev/null +++ b/tools/power/x86/intel-speed-select/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +include/ +intel-speed-select diff --git a/tools/power/x86/intel-speed-select/Build b/tools/power/x86/intel-speed-select/Build new file mode 100644 index 000000000..81e36bd57 --- /dev/null +++ b/tools/power/x86/intel-speed-select/Build @@ -0,0 +1 @@ +intel-speed-select-y += isst-config.o isst-core.o isst-display.o isst-daemon.o hfi-events.o diff --git a/tools/power/x86/intel-speed-select/Makefile b/tools/power/x86/intel-speed-select/Makefile new file mode 100644 index 000000000..7221f2f55 --- /dev/null +++ b/tools/power/x86/intel-speed-select/Makefile @@ -0,0 +1,60 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../../scripts/Makefile.include + +bindir ?= /usr/bin + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r +override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include -I/usr/include/libnl3 +override LDFLAGS += -lnl-genl-3 -lnl-3 + +ALL_TARGETS := intel-speed-select +ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS)) + +all: $(ALL_PROGRAMS) + +export srctree OUTPUT CC LD CFLAGS +include $(srctree)/tools/build/Makefile.include + +# +# We need the following to be outside of kernel tree +# +$(OUTPUT)include/linux/isst_if.h: ../../../../include/uapi/linux/isst_if.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../../../include/uapi/linux/isst_if.h $@ + +$(OUTPUT)include/linux/thermal.h: ../../../../include/uapi/linux/thermal.h + mkdir -p $(OUTPUT)include/linux 2>&1 || true + ln -sf $(CURDIR)/../../../../include/uapi/linux/thermal.h $@ + +prepare: $(OUTPUT)include/linux/isst_if.h $(OUTPUT)include/linux/thermal.h + +ISST_IN := $(OUTPUT)intel-speed-select-in.o + +$(ISST_IN): prepare FORCE + $(Q)$(MAKE) $(build)=intel-speed-select +$(OUTPUT)intel-speed-select: $(ISST_IN) + $(QUIET_LINK)$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@ + +clean: + rm -f $(ALL_PROGRAMS) + rm -rf $(OUTPUT)include/linux/isst_if.h + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + +install: $(ALL_PROGRAMS) + install -d -m 755 $(DESTDIR)$(bindir); \ + for program in $(ALL_PROGRAMS); do \ + install $$program $(DESTDIR)$(bindir); \ + done + +FORCE: + +.PHONY: all install clean FORCE prepare diff --git a/tools/power/x86/intel-speed-select/hfi-events.c b/tools/power/x86/intel-speed-select/hfi-events.c new file mode 100644 index 000000000..be96e90cc --- /dev/null +++ b/tools/power/x86/intel-speed-select/hfi-events.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Read HFI events for OOB + * Copyright (c) 2022 Intel Corporation. + */ + +/* + * This file incorporates work covered by the following copyright and + * permission notice: + + * WPA Supplicant - driver interaction with Linux nl80211/cfg80211 + * Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Alternatively, this software may be distributed under the terms of + * BSD license. + * + * Requires + * libnl-genl-3-dev + * + * For Fedora/CenOS + * dnf install libnl3-devel + * For Ubuntu + * apt install libnl-3-dev libnl-genl-3-dev + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/file.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <linux/thermal.h> +#include "isst.h" + +struct hfi_event_data { + struct nl_sock *nl_handle; + struct nl_cb *nl_cb; +}; + +struct hfi_event_data drv; + +static int ack_handler(struct nl_msg *msg, void *arg) +{ + int *err = arg; + *err = 0; + return NL_STOP; +} + +static int finish_handler(struct nl_msg *msg, void *arg) +{ + int *ret = arg; + *ret = 0; + return NL_SKIP; +} + +static int error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, + void *arg) +{ + int *ret = arg; + *ret = err->error; + return NL_SKIP; +} + +static int seq_check_handler(struct nl_msg *msg, void *arg) +{ + return NL_OK; +} + +static int send_and_recv_msgs(struct hfi_event_data *drv, + struct nl_msg *msg, + int (*valid_handler)(struct nl_msg *, void *), + void *valid_data) +{ + struct nl_cb *cb; + int err = -ENOMEM; + + cb = nl_cb_clone(drv->nl_cb); + if (!cb) + goto out; + + err = nl_send_auto_complete(drv->nl_handle, msg); + if (err < 0) + goto out; + + err = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &err); + nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, finish_handler, &err); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, ack_handler, &err); + + if (valid_handler) + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, + valid_handler, valid_data); + + while (err > 0) + nl_recvmsgs(drv->nl_handle, cb); + out: + nl_cb_put(cb); + nlmsg_free(msg); + return err; +} + +struct family_data { + const char *group; + int id; +}; + +static int family_handler(struct nl_msg *msg, void *arg) +{ + struct family_data *res = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int i; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], i) { + struct nlattr *tb2[CTRL_ATTR_MCAST_GRP_MAX + 1]; + nla_parse(tb2, CTRL_ATTR_MCAST_GRP_MAX, nla_data(mcgrp), + nla_len(mcgrp), NULL); + if (!tb2[CTRL_ATTR_MCAST_GRP_NAME] || + !tb2[CTRL_ATTR_MCAST_GRP_ID] || + strncmp(nla_data(tb2[CTRL_ATTR_MCAST_GRP_NAME]), + res->group, + nla_len(tb2[CTRL_ATTR_MCAST_GRP_NAME])) != 0) + continue; + res->id = nla_get_u32(tb2[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return 0; +} + +static int nl_get_multicast_id(struct hfi_event_data *drv, + const char *family, const char *group) +{ + struct nl_msg *msg; + int ret = -1; + struct family_data res = { group, -ENOENT }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + genlmsg_put(msg, 0, 0, genl_ctrl_resolve(drv->nl_handle, "nlctrl"), + 0, 0, CTRL_CMD_GETFAMILY, 0); + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = send_and_recv_msgs(drv, msg, family_handler, &res); + msg = NULL; + if (ret == 0) + ret = res.id; + +nla_put_failure: + nlmsg_free(msg); + return ret; +} + +struct perf_cap { + int cpu; + int perf; + int eff; +}; + +static void process_hfi_event(struct perf_cap *perf_cap) +{ + struct isst_id id; + + set_isst_id(&id, perf_cap->cpu); + process_level_change(&id); +} + +static int handle_event(struct nl_msg *n, void *arg) +{ + struct nlmsghdr *nlh = nlmsg_hdr(n); + struct genlmsghdr *genlhdr = genlmsg_hdr(nlh); + struct nlattr *attrs[THERMAL_GENL_ATTR_MAX + 1]; + int ret; + struct perf_cap perf_cap = {0}; + + ret = genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); + + debug_printf("Received event %d parse_rer:%d\n", genlhdr->cmd, ret); + if (genlhdr->cmd == THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE) { + struct nlattr *cap; + int j, index = 0; + + debug_printf("THERMAL_GENL_EVENT_CPU_CAPABILITY_CHANGE\n"); + nla_for_each_nested(cap, attrs[THERMAL_GENL_ATTR_CPU_CAPABILITY], j) { + switch (index) { + case 0: + perf_cap.cpu = nla_get_u32(cap); + break; + case 1: + perf_cap.perf = nla_get_u32(cap); + break; + case 2: + perf_cap.eff = nla_get_u32(cap); + break; + default: + break; + } + ++index; + if (index == 3) { + index = 0; + process_hfi_event(&perf_cap); + } + } + } + + return 0; +} + +static int _hfi_exit; + +static int check_hf_suport(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + + __cpuid(6, eax, ebx, ecx, edx); + if (eax & BIT(19)) + return 1; + + return 0; +} + +int hfi_main(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int err = 0; + int mcast_id; + int no_block = 0; + + if (!check_hf_suport()) { + fprintf(stderr, "CPU Doesn't support HFI\n"); + return -1; + } + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "nl_socket_alloc failed\n"); + return -1; + } + + if (genl_connect(sock)) { + fprintf(stderr, "genl_connect(sk_event) failed\n"); + goto free_sock; + } + + drv.nl_handle = sock; + drv.nl_cb = cb = nl_cb_alloc(NL_CB_DEFAULT); + if (drv.nl_cb == NULL) { + printf("Failed to allocate netlink callbacks"); + goto free_sock; + } + + mcast_id = nl_get_multicast_id(&drv, THERMAL_GENL_FAMILY_NAME, + THERMAL_GENL_EVENT_GROUP_NAME); + if (mcast_id < 0) { + fprintf(stderr, "nl_get_multicast_id failed\n"); + goto free_sock; + } + + if (nl_socket_add_membership(sock, mcast_id)) { + fprintf(stderr, "nl_socket_add_membership failed"); + goto free_sock; + } + + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, seq_check_handler, 0); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, handle_event, NULL); + + if (no_block) + nl_socket_set_nonblocking(sock); + + debug_printf("hfi is initialized\n"); + + while (!_hfi_exit && !err) { + err = nl_recvmsgs(sock, cb); + debug_printf("nl_recv_message err:%d\n", err); + } + + return 0; + + /* Netlink library doesn't have calls to dealloc cb or disconnect */ +free_sock: + nl_socket_free(sock); + + return -1; +} + +void hfi_exit(void) +{ + _hfi_exit = 1; +} diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c new file mode 100644 index 000000000..be3668d37 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-config.c @@ -0,0 +1,2990 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Enumerate and control features + * Copyright (c) 2019 Intel Corporation. + */ + +#include <linux/isst_if.h> + +#include "isst.h" + +struct process_cmd_struct { + char *feature; + char *command; + void (*process_fn)(int arg); + int arg; +}; + +static const char *version_str = "v1.13"; + +static const int supported_api_ver = 1; +static struct isst_if_platform_info isst_platform_info; +static char *progname; +static int debug_flag; +static FILE *outf; + +static int cpu_model; +static int cpu_stepping; + +#define MAX_CPUS_IN_ONE_REQ 256 +static short max_target_cpus; +static unsigned short target_cpus[MAX_CPUS_IN_ONE_REQ]; + +static int topo_max_cpus; +static size_t present_cpumask_size; +static cpu_set_t *present_cpumask; +static size_t target_cpumask_size; +static cpu_set_t *target_cpumask; +static int tdp_level = 0xFF; +static int fact_bucket = 0xFF; +static int fact_avx = 0xFF; +static unsigned long long fact_trl; +static int out_format_json; +static int cmd_help; +static int force_online_offline; +static int auto_mode; +static int fact_enable_fail; + +static int mbox_delay; +static int mbox_retries = 3; + +/* clos related */ +static int current_clos = -1; +static int clos_epp = -1; +static int clos_prop_prio = -1; +static int clos_min = -1; +static int clos_max = -1; +static int clos_desired = -1; +static int clos_priority_type; + +struct _cpu_map { + unsigned short core_id; + unsigned short pkg_id; + unsigned short die_id; + unsigned short punit_cpu; + unsigned short punit_cpu_core; + unsigned short initialized; +}; +struct _cpu_map *cpu_map; + +struct cpu_topology { + short cpu; + short core_id; + short pkg_id; + short die_id; +}; + +FILE *get_output_file(void) +{ + return outf; +} + +void debug_printf(const char *format, ...) +{ + va_list args; + + va_start(args, format); + + if (debug_flag) + vprintf(format, args); + + va_end(args); +} + + +int is_clx_n_platform(void) +{ + if (cpu_model == 0x55) + if (cpu_stepping == 0x6 || cpu_stepping == 0x7) + return 1; + return 0; +} + +int is_skx_based_platform(void) +{ + if (cpu_model == 0x55) + return 1; + + return 0; +} + +int is_spr_platform(void) +{ + if (cpu_model == 0x8F || cpu_model == 0xCF) + return 1; + + return 0; +} + +int is_icx_platform(void) +{ + if (cpu_model == 0x6A || cpu_model == 0x6C) + return 1; + + return 0; +} + +static int update_cpu_model(void) +{ + unsigned int ebx, ecx, edx; + unsigned int fms, family; + + __cpuid(1, fms, ebx, ecx, edx); + family = (fms >> 8) & 0xf; + cpu_model = (fms >> 4) & 0xf; + if (family == 6 || family == 0xf) + cpu_model += ((fms >> 16) & 0xf) << 4; + + cpu_stepping = fms & 0xf; + /* only three CascadeLake-N models are supported */ + if (is_clx_n_platform()) { + FILE *fp; + size_t n = 0; + char *line = NULL; + int ret = 1; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + err(-1, "cannot open /proc/cpuinfo\n"); + + while (getline(&line, &n, fp) > 0) { + if (strstr(line, "model name")) { + if (strstr(line, "6252N") || + strstr(line, "6230N") || + strstr(line, "5218N")) + ret = 0; + break; + } + } + free(line); + fclose(fp); + return ret; + } + return 0; +} + +/* Open a file, and exit on failure */ +static FILE *fopen_or_exit(const char *path, const char *mode) +{ + FILE *filep = fopen(path, mode); + + if (!filep) + err(1, "%s: open failed", path); + + return filep; +} + +/* Parse a file containing a single int */ +static int parse_int_file(int fatal, const char *fmt, ...) +{ + va_list args; + char path[PATH_MAX]; + FILE *filep; + int value; + + va_start(args, fmt); + vsnprintf(path, sizeof(path), fmt, args); + va_end(args); + if (fatal) { + filep = fopen_or_exit(path, "r"); + } else { + filep = fopen(path, "r"); + if (!filep) + return -1; + } + if (fscanf(filep, "%d", &value) != 1) + err(1, "%s: failed to parse number from file", path); + fclose(filep); + + return value; +} + +int cpufreq_sysfs_present(void) +{ + DIR *dir; + + dir = opendir("/sys/devices/system/cpu/cpu0/cpufreq"); + if (dir) { + closedir(dir); + return 1; + } + + return 0; +} + +int out_format_is_json(void) +{ + return out_format_json; +} + +static int get_stored_topology_info(int cpu, int *core_id, int *pkg_id, int *die_id) +{ + const char *pathname = "/var/run/isst_cpu_topology.dat"; + struct cpu_topology cpu_top; + FILE *fp; + int ret; + + fp = fopen(pathname, "rb"); + if (!fp) + return -1; + + ret = fseek(fp, cpu * sizeof(cpu_top), SEEK_SET); + if (ret) + goto err_ret; + + ret = fread(&cpu_top, sizeof(cpu_top), 1, fp); + if (ret != 1) { + ret = -1; + goto err_ret; + } + + *pkg_id = cpu_top.pkg_id; + *core_id = cpu_top.core_id; + *die_id = cpu_top.die_id; + ret = 0; + +err_ret: + fclose(fp); + + return ret; +} + +static void store_cpu_topology(void) +{ + const char *pathname = "/var/run/isst_cpu_topology.dat"; + FILE *fp; + int i; + + fp = fopen(pathname, "rb"); + if (fp) { + /* Mapping already exists */ + fclose(fp); + return; + } + + fp = fopen(pathname, "wb"); + if (!fp) { + fprintf(stderr, "Can't create file:%s\n", pathname); + return; + } + + fprintf(stderr, "Caching topology information\n"); + + for (i = 0; i < topo_max_cpus; ++i) { + struct cpu_topology cpu_top; + + cpu_top.core_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/core_id", i); + if (cpu_top.core_id < 0) + cpu_top.core_id = -1; + + cpu_top.pkg_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i); + if (cpu_top.pkg_id < 0) + cpu_top.pkg_id = -1; + + cpu_top.die_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/die_id", i); + if (cpu_top.die_id < 0) + cpu_top.die_id = -1; + + cpu_top.cpu = i; + + if (fwrite(&cpu_top, sizeof(cpu_top), 1, fp) != 1) { + fprintf(stderr, "Can't write to:%s\n", pathname); + break; + } + } + + fclose(fp); +} + +static int get_physical_package_id(int cpu) +{ + int ret; + + if (cpu < 0) + return -1; + + if (cpu_map && cpu_map[cpu].initialized) + return cpu_map[cpu].pkg_id; + + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) + return pkg_id; + } + + return ret; +} + +static int get_physical_core_id(int cpu) +{ + int ret; + + if (cpu < 0) + return -1; + + if (cpu_map && cpu_map[cpu].initialized) + return cpu_map[cpu].core_id; + + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/core_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) + return core_id; + } + + return ret; +} + +static int get_physical_die_id(int cpu) +{ + int ret; + + if (cpu < 0) + return -1; + + if (cpu_map && cpu_map[cpu].initialized) + return cpu_map[cpu].die_id; + + ret = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/die_id", + cpu); + if (ret < 0) { + int core_id, pkg_id, die_id; + + ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id); + if (!ret) { + if (die_id < 0) + die_id = 0; + + return die_id; + } + } + + if (ret < 0) + ret = 0; + + return ret; +} + +void set_isst_id(struct isst_id *id, int cpu) +{ + id->cpu = cpu; + + id->pkg = get_physical_package_id(cpu); + if (id < 0 || id->pkg >= MAX_PACKAGE_COUNT) + id->pkg = -1; + + id->die = get_physical_die_id(cpu); + if (id < 0 || id->die >= MAX_DIE_PER_PACKAGE) + id->die = -1; +} + +int is_cpu_in_power_domain(int cpu, struct isst_id *id) +{ + struct isst_id tid; + + set_isst_id(&tid, cpu); + + if (id->pkg == tid.pkg && id->die == tid.die) + return 1; + + return 0; +} + +int get_cpufreq_base_freq(int cpu) +{ + return parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/base_frequency", cpu); +} + +int get_topo_max_cpus(void) +{ + return topo_max_cpus; +} + +void set_cpu_online_offline(int cpu, int state) +{ + char buffer[128]; + int fd, ret; + + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/online", cpu); + + fd = open(buffer, O_WRONLY); + if (fd < 0) { + if (!cpu && state) { + fprintf(stderr, "This system is not configured for CPU 0 online/offline\n"); + fprintf(stderr, "Ignoring online request for CPU 0 as this is already online\n"); + return; + } + err(-1, "%s open failed", buffer); + } + + if (state) + ret = write(fd, "1\n", 2); + else + ret = write(fd, "0\n", 2); + + if (ret == -1) + perror("Online/Offline: Operation failed\n"); + + close(fd); +} + +static void force_all_cpus_online(void) +{ + int i; + + fprintf(stderr, "Forcing all CPUs online\n"); + + for (i = 0; i < topo_max_cpus; ++i) + set_cpu_online_offline(i, 1); + + unlink("/var/run/isst_cpu_topology.dat"); +} + +void for_each_online_package_in_set(void (*callback)(struct isst_id *, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int max_packages[MAX_PACKAGE_COUNT * MAX_PACKAGE_COUNT]; + int pkg_index = 0, i; + struct isst_id id; + + memset(max_packages, 0xff, sizeof(max_packages)); + for (i = 0; i < topo_max_cpus; ++i) { + int j, online, pkg_id, die_id = 0, skip = 0; + + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; + if (i) + online = parse_int_file( + 1, "/sys/devices/system/cpu/cpu%d/online", i); + else + online = + 1; /* online entry for CPU 0 needs some special configs */ + + die_id = get_physical_die_id(i); + if (die_id < 0) + die_id = 0; + + pkg_id = parse_int_file(0, + "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i); + if (pkg_id < 0) + continue; + + /* Create an unique id for package, die combination to store */ + pkg_id = (MAX_PACKAGE_COUNT * pkg_id + die_id); + + for (j = 0; j < pkg_index; ++j) { + if (max_packages[j] == pkg_id) { + skip = 1; + break; + } + } + + set_isst_id(&id, i); + if (!skip && online && callback) { + callback(&id, arg1, arg2, arg3, arg4); + max_packages[pkg_index++] = pkg_id; + } + } +} + +static void for_each_online_target_cpu_in_set( + void (*callback)(struct isst_id *, void *, void *, void *, void *), void *arg1, + void *arg2, void *arg3, void *arg4) +{ + int i, found = 0; + struct isst_id id; + + for (i = 0; i < topo_max_cpus; ++i) { + int online; + + if (!CPU_ISSET_S(i, target_cpumask_size, target_cpumask)) + continue; + if (i) + online = parse_int_file( + 1, "/sys/devices/system/cpu/cpu%d/online", i); + else + online = + 1; /* online entry for CPU 0 needs some special configs */ + + set_isst_id(&id, i); + if (online && callback) { + callback(&id, arg1, arg2, arg3, arg4); + found = 1; + } + } + + if (!found) + fprintf(stderr, "No valid CPU in the list\n"); +} + +#define BITMASK_SIZE 32 +static void set_max_cpu_num(void) +{ + FILE *filep; + unsigned long dummy; + int i; + + topo_max_cpus = 0; + for (i = 0; i < 256; ++i) { + char path[256]; + + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i); + filep = fopen(path, "r"); + if (filep) + break; + } + + if (!filep) { + fprintf(stderr, "Can't get max cpu number\n"); + exit(0); + } + + while (fscanf(filep, "%lx,", &dummy) == 1) + topo_max_cpus += BITMASK_SIZE; + fclose(filep); + + debug_printf("max cpus %d\n", topo_max_cpus); +} + +size_t alloc_cpu_set(cpu_set_t **cpu_set) +{ + cpu_set_t *_cpu_set; + size_t size; + + _cpu_set = CPU_ALLOC((topo_max_cpus + 1)); + if (_cpu_set == NULL) + err(3, "CPU_ALLOC"); + size = CPU_ALLOC_SIZE((topo_max_cpus + 1)); + CPU_ZERO_S(size, _cpu_set); + + *cpu_set = _cpu_set; + return size; +} + +void free_cpu_set(cpu_set_t *cpu_set) +{ + CPU_FREE(cpu_set); +} + +static int cpu_cnt[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; + +int get_max_punit_core_id(struct isst_id *id) +{ + int max_id = 0; + int i; + + for (i = 0; i < topo_max_cpus; ++i) + { + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; + + if (is_cpu_in_power_domain(i, id) && + cpu_map[i].punit_cpu_core > max_id) + max_id = cpu_map[i].punit_cpu_core; + } + + return max_id; +} + +int get_cpu_count(struct isst_id *id) +{ + if (id->pkg < 0 || id->die < 0) + return 0; + + return cpu_cnt[id->pkg][id->die]; +} + +static void create_cpu_map(void) +{ + const char *pathname = "/dev/isst_interface"; + size_t size; + DIR *dir; + int i, fd = 0; + struct isst_if_cpu_maps map; + + /* Use calloc to make sure the memory is initialized to Zero */ + cpu_map = calloc(topo_max_cpus, sizeof(*cpu_map)); + if (!cpu_map) + err(3, "cpumap"); + + fd = open(pathname, O_RDWR); + if (fd < 0 && !is_clx_n_platform()) + err(-1, "%s open failed", pathname); + + size = alloc_cpu_set(&present_cpumask); + present_cpumask_size = size; + + for (i = 0; i < topo_max_cpus; ++i) { + char buffer[256]; + int pkg_id, die_id, core_id; + + /* check if CPU is online */ + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d", i); + dir = opendir(buffer); + if (!dir) + continue; + closedir(dir); + + CPU_SET_S(i, size, present_cpumask); + + pkg_id = get_physical_package_id(i); + die_id = get_physical_die_id(i); + core_id = get_physical_core_id(i); + + if (pkg_id < 0 || die_id < 0 || core_id < 0) + continue; + + cpu_map[i].pkg_id = pkg_id; + cpu_map[i].die_id = die_id; + cpu_map[i].core_id = core_id; + cpu_map[i].initialized = 1; + + cpu_cnt[pkg_id][die_id]++; + + if (fd < 0) + continue; + map.cmd_count = 1; + map.cpu_map[0].logical_cpu = i; + debug_printf(" map logical_cpu:%d\n", + map.cpu_map[0].logical_cpu); + if (ioctl(fd, ISST_IF_GET_PHY_ID, &map) == -1) { + perror("ISST_IF_GET_PHY_ID"); + fprintf(outf, "Error: map logical_cpu:%d\n", + map.cpu_map[0].logical_cpu); + continue; + } + cpu_map[i].punit_cpu = map.cpu_map[0].physical_cpu; + cpu_map[i].punit_cpu_core = (map.cpu_map[0].physical_cpu >> + 1); // shift to get core id + + debug_printf( + "map logical_cpu:%d core: %d die:%d pkg:%d punit_cpu:%d punit_core:%d\n", + i, cpu_map[i].core_id, cpu_map[i].die_id, + cpu_map[i].pkg_id, cpu_map[i].punit_cpu, + cpu_map[i].punit_cpu_core); + } + if (fd >= 0) + close(fd); + + size = alloc_cpu_set(&target_cpumask); + target_cpumask_size = size; + for (i = 0; i < max_target_cpus; ++i) { + if (!CPU_ISSET_S(target_cpus[i], present_cpumask_size, + present_cpumask)) + continue; + + CPU_SET_S(target_cpus[i], size, target_cpumask); + } +} + +void set_cpu_mask_from_punit_coremask(struct isst_id *id, unsigned long long core_mask, + size_t core_cpumask_size, + cpu_set_t *core_cpumask, int *cpu_cnt) +{ + int i, cnt = 0; + + *cpu_cnt = 0; + + for (i = 0; i < 64; ++i) { + if (core_mask & BIT_ULL(i)) { + int j; + + for (j = 0; j < topo_max_cpus; ++j) { + if (!CPU_ISSET_S(j, present_cpumask_size, present_cpumask)) + continue; + + if (is_cpu_in_power_domain(j, id) && + cpu_map[j].punit_cpu_core == i) { + CPU_SET_S(j, core_cpumask_size, + core_cpumask); + ++cnt; + } + } + } + } + + *cpu_cnt = cnt; +} + +int find_phy_core_num(int logical_cpu) +{ + if (logical_cpu < topo_max_cpus) + return cpu_map[logical_cpu].punit_cpu_core; + + return -EINVAL; +} + +static int isst_send_mmio_command(unsigned int cpu, unsigned int reg, int write, + unsigned int *value) +{ + struct isst_if_io_regs io_regs; + const char *pathname = "/dev/isst_interface"; + int cmd; + int fd; + + debug_printf("mmio_cmd cpu:%d reg:%d write:%d\n", cpu, reg, write); + + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed", pathname); + + io_regs.req_count = 1; + io_regs.io_reg[0].logical_cpu = cpu; + io_regs.io_reg[0].reg = reg; + cmd = ISST_IF_IO_CMD; + if (write) { + io_regs.io_reg[0].read_write = 1; + io_regs.io_reg[0].value = *value; + } else { + io_regs.io_reg[0].read_write = 0; + } + + if (ioctl(fd, cmd, &io_regs) == -1) { + if (errno == ENOTTY) { + perror("ISST_IF_IO_COMMAND\n"); + fprintf(stderr, "Check presence of kernel modules: isst_if_mmio\n"); + exit(0); + } + fprintf(outf, "Error: mmio_cmd cpu:%d reg:%x read_write:%x\n", + cpu, reg, write); + } else { + if (!write) + *value = io_regs.io_reg[0].value; + + debug_printf( + "mmio_cmd response: cpu:%d reg:%x rd_write:%x resp:%x\n", + cpu, reg, write, *value); + } + + close(fd); + + return 0; +} + +int isst_send_mbox_command(unsigned int cpu, unsigned char command, + unsigned char sub_command, unsigned int parameter, + unsigned int req_data, unsigned int *resp) +{ + const char *pathname = "/dev/isst_interface"; + int fd, retry; + struct isst_if_mbox_cmds mbox_cmds = { 0 }; + + debug_printf( + "mbox_send: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n", + cpu, command, sub_command, parameter, req_data); + + if (!is_skx_based_platform() && command == CONFIG_CLOS && + sub_command != CLOS_PM_QOS_CONFIG) { + unsigned int value; + int write = 0; + int clos_id, core_id, ret = 0; + + debug_printf("CPU %d\n", cpu); + + if (parameter & BIT(MBOX_CMD_WRITE_BIT)) { + value = req_data; + write = 1; + } + + switch (sub_command) { + case CLOS_PQR_ASSOC: + core_id = parameter & 0xff; + ret = isst_send_mmio_command( + cpu, PQR_ASSOC_OFFSET + core_id * 4, write, + &value); + if (!ret && !write) + *resp = value; + break; + case CLOS_PM_CLOS: + clos_id = parameter & 0x03; + ret = isst_send_mmio_command( + cpu, PM_CLOS_OFFSET + clos_id * 4, write, + &value); + if (!ret && !write) + *resp = value; + break; + case CLOS_STATUS: + break; + default: + break; + } + return ret; + } + + mbox_cmds.cmd_count = 1; + mbox_cmds.mbox_cmd[0].logical_cpu = cpu; + mbox_cmds.mbox_cmd[0].command = command; + mbox_cmds.mbox_cmd[0].sub_command = sub_command; + mbox_cmds.mbox_cmd[0].parameter = parameter; + mbox_cmds.mbox_cmd[0].req_data = req_data; + + if (mbox_delay) + usleep(mbox_delay * 1000); + + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed", pathname); + + retry = mbox_retries; + + do { + if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) { + if (errno == ENOTTY) { + perror("ISST_IF_MBOX_COMMAND\n"); + fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n"); + exit(0); + } + debug_printf( + "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n", + cpu, command, sub_command, parameter, req_data, errno); + --retry; + } else { + *resp = mbox_cmds.mbox_cmd[0].resp_data; + debug_printf( + "mbox_cmd response: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x resp:%x\n", + cpu, command, sub_command, parameter, req_data, *resp); + break; + } + } while (retry); + + close(fd); + + if (!retry) { + debug_printf("Failed mbox command even after retries\n"); + return -1; + + } + return 0; +} + +int isst_send_msr_command(unsigned int cpu, unsigned int msr, int write, + unsigned long long *req_resp) +{ + struct isst_if_msr_cmds msr_cmds; + const char *pathname = "/dev/isst_interface"; + int fd; + + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed", pathname); + + msr_cmds.cmd_count = 1; + msr_cmds.msr_cmd[0].logical_cpu = cpu; + msr_cmds.msr_cmd[0].msr = msr; + msr_cmds.msr_cmd[0].read_write = write; + if (write) + msr_cmds.msr_cmd[0].data = *req_resp; + + if (ioctl(fd, ISST_IF_MSR_COMMAND, &msr_cmds) == -1) { + perror("ISST_IF_MSR_COMMAND"); + fprintf(outf, "Error: msr_cmd cpu:%d msr:%x read_write:%d\n", + cpu, msr, write); + } else { + if (!write) + *req_resp = msr_cmds.msr_cmd[0].data; + + debug_printf( + "msr_cmd response: cpu:%d msr:%x rd_write:%x resp:%llx %llx\n", + cpu, msr, write, *req_resp, msr_cmds.msr_cmd[0].data); + } + + close(fd); + + return 0; +} + +static int isst_fill_platform_info(void) +{ + const char *pathname = "/dev/isst_interface"; + int fd; + + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed", pathname); + + if (ioctl(fd, ISST_IF_GET_PLATFORM_INFO, &isst_platform_info) == -1) { + perror("ISST_IF_GET_PLATFORM_INFO"); + close(fd); + return -1; + } + + close(fd); + + if (isst_platform_info.api_version > supported_api_ver) { + printf("Incompatible API versions; Upgrade of tool is required\n"); + return -1; + } + return 0; +} + +static void isst_print_extended_platform_info(void) +{ + int cp_state, cp_cap, fact_support = 0, pbf_support = 0; + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + int ret, i, j; + FILE *filep; + struct isst_id id; + + for (i = 0; i < 256; ++i) { + char path[256]; + + snprintf(path, sizeof(path), + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i); + filep = fopen(path, "r"); + if (filep) + break; + } + + if (!filep) + return; + + fclose(filep); + + set_isst_id(&id, i); + ret = isst_get_ctdp_levels(&id, &pkg_dev); + if (ret) + return; + + if (pkg_dev.enabled) { + fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is supported\n"); + } else { + fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is not supported\n"); + fprintf(outf, "Only performance level 0 (base level) is present\n"); + } + + if (pkg_dev.locked) + fprintf(outf, "TDP level change control is locked\n"); + else + fprintf(outf, "TDP level change control is unlocked, max level: %d \n", pkg_dev.levels); + + for (j = 0; j <= pkg_dev.levels; ++j) { + ret = isst_get_ctdp_control(&id, j, &ctdp_level); + if (ret) + continue; + + if (!fact_support && ctdp_level.fact_support) + fact_support = 1; + + if (!pbf_support && ctdp_level.pbf_support) + pbf_support = 1; + } + + if (fact_support) + fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is supported\n"); + else + fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is not supported\n"); + + if (pbf_support) + fprintf(outf, "Intel(R) SST-BF (feature base-freq) is supported\n"); + else + fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n"); + + ret = isst_read_pm_config(&id, &cp_state, &cp_cap); + if (ret) { + fprintf(outf, "Intel(R) SST-CP (feature core-power) status is unknown\n"); + return; + } + if (cp_cap) + fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n"); + else + fprintf(outf, "Intel(R) SST-CP (feature core-power) is not supported\n"); +} + +static void isst_print_platform_information(void) +{ + struct isst_if_platform_info platform_info; + const char *pathname = "/dev/isst_interface"; + int fd; + + if (is_clx_n_platform()) { + fprintf(stderr, "\nThis option in not supported on this platform\n"); + exit(0); + } + + /* Early initialization to create working cpu_map */ + set_max_cpu_num(); + create_cpu_map(); + + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed", pathname); + + if (ioctl(fd, ISST_IF_GET_PLATFORM_INFO, &platform_info) == -1) { + perror("ISST_IF_GET_PLATFORM_INFO"); + } else { + fprintf(outf, "Platform: API version : %d\n", + platform_info.api_version); + fprintf(outf, "Platform: Driver version : %d\n", + platform_info.driver_version); + fprintf(outf, "Platform: mbox supported : %d\n", + platform_info.mbox_supported); + fprintf(outf, "Platform: mmio supported : %d\n", + platform_info.mmio_supported); + isst_print_extended_platform_info(); + } + + close(fd); + + exit(0); +} + +static char *local_str0, *local_str1; +static void exec_on_get_ctdp_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int (*fn_ptr)(struct isst_id *id, void *arg); + int ret; + + fn_ptr = arg1; + ret = fn_ptr(id, arg2); + if (ret) + isst_display_error_info_message(1, "get_tdp_* failed", 0, 0); + else + isst_ctdp_display_core_info(id, outf, arg3, + *(unsigned int *)arg4, + local_str0, local_str1); +} + +#define _get_tdp_level(desc, suffix, object, help, str0, str1) \ + static void get_tdp_##object(int arg) \ + { \ + struct isst_pkg_ctdp ctdp; \ +\ + if (cmd_help) { \ + fprintf(stderr, \ + "Print %s [No command arguments are required]\n", \ + help); \ + exit(0); \ + } \ + local_str0 = str0; \ + local_str1 = str1; \ + isst_ctdp_display_information_start(outf); \ + if (max_target_cpus) \ + for_each_online_target_cpu_in_set( \ + exec_on_get_ctdp_cpu, isst_get_ctdp_##suffix, \ + &ctdp, desc, &ctdp.object); \ + else \ + for_each_online_package_in_set(exec_on_get_ctdp_cpu, \ + isst_get_ctdp_##suffix, \ + &ctdp, desc, \ + &ctdp.object); \ + isst_ctdp_display_information_end(outf); \ + } + +_get_tdp_level("get-config-levels", levels, levels, "Max TDP level", NULL, NULL); +_get_tdp_level("get-config-version", levels, version, "TDP version", NULL, NULL); +_get_tdp_level("get-config-enabled", levels, enabled, "perf-profile enable status", "disabled", "enabled"); +_get_tdp_level("get-config-current_level", levels, current_level, + "Current TDP Level", NULL, NULL); +_get_tdp_level("get-lock-status", levels, locked, "TDP lock status", "unlocked", "locked"); + +struct isst_pkg_ctdp clx_n_pkg_dev; + +static int clx_n_get_base_ratio(void) +{ + FILE *fp; + char *begin, *end, *line = NULL; + char number[5]; + float value = 0; + size_t n = 0; + + fp = fopen("/proc/cpuinfo", "r"); + if (!fp) + err(-1, "cannot open /proc/cpuinfo\n"); + + while (getline(&line, &n, fp) > 0) { + if (strstr(line, "model name")) { + /* this is true for CascadeLake-N */ + begin = strstr(line, "@ ") + 2; + end = strstr(line, "GHz"); + strncpy(number, begin, end - begin); + value = atof(number) * 10; + break; + } + } + free(line); + fclose(fp); + + return (int)(value); +} + +static int clx_n_config(struct isst_id *id) +{ + int i, ret; + unsigned long cpu_bf; + struct isst_pkg_ctdp_level_info *ctdp_level; + struct isst_pbf_info *pbf_info; + + ctdp_level = &clx_n_pkg_dev.ctdp_level[0]; + pbf_info = &ctdp_level->pbf_info; + ctdp_level->core_cpumask_size = + alloc_cpu_set(&ctdp_level->core_cpumask); + + /* find the frequency base ratio */ + ctdp_level->tdp_ratio = clx_n_get_base_ratio(); + if (ctdp_level->tdp_ratio == 0) { + debug_printf("CLX: cn base ratio is zero\n"); + ret = -1; + goto error_ret; + } + + /* find the high and low priority frequencies */ + pbf_info->p1_high = 0; + pbf_info->p1_low = ~0; + + for (i = 0; i < topo_max_cpus; i++) { + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; + + if (!is_cpu_in_power_domain(i, id)) + continue; + + CPU_SET_S(i, ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask); + + cpu_bf = parse_int_file(1, + "/sys/devices/system/cpu/cpu%d/cpufreq/base_frequency", + i); + if (cpu_bf > pbf_info->p1_high) + pbf_info->p1_high = cpu_bf; + if (cpu_bf < pbf_info->p1_low) + pbf_info->p1_low = cpu_bf; + } + + if (pbf_info->p1_high == ~0UL) { + debug_printf("CLX: maximum base frequency not set\n"); + ret = -1; + goto error_ret; + } + + if (pbf_info->p1_low == 0) { + debug_printf("CLX: minimum base frequency not set\n"); + ret = -1; + goto error_ret; + } + + /* convert frequencies back to ratios */ + pbf_info->p1_high = pbf_info->p1_high / 100000; + pbf_info->p1_low = pbf_info->p1_low / 100000; + + /* create high priority cpu mask */ + pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask); + for (i = 0; i < topo_max_cpus; i++) { + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; + + if (!is_cpu_in_power_domain(i, id)) + continue; + + cpu_bf = parse_int_file(1, + "/sys/devices/system/cpu/cpu%d/cpufreq/base_frequency", + i); + cpu_bf = cpu_bf / 100000; + if (cpu_bf == pbf_info->p1_high) + CPU_SET_S(i, pbf_info->core_cpumask_size, + pbf_info->core_cpumask); + } + + /* extra ctdp & pbf struct parameters */ + ctdp_level->processed = 1; + ctdp_level->pbf_support = 1; /* PBF is always supported and enabled */ + ctdp_level->pbf_enabled = 1; + ctdp_level->fact_support = 0; /* FACT is never supported */ + ctdp_level->fact_enabled = 0; + + return 0; + +error_ret: + free_cpu_set(ctdp_level->core_cpumask); + return ret; +} + +static void dump_clx_n_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + int ret; + + if (tdp_level != 0xff && tdp_level != 0) { + isst_display_error_info_message(1, "Invalid level", 1, tdp_level); + exit(0); + } + + ret = clx_n_config(id); + if (ret) { + debug_printf("clx_n_config failed"); + } else { + struct isst_pkg_ctdp_level_info *ctdp_level; + struct isst_pbf_info *pbf_info; + + ctdp_level = &clx_n_pkg_dev.ctdp_level[0]; + pbf_info = &ctdp_level->pbf_info; + clx_n_pkg_dev.processed = 1; + isst_ctdp_display_information(id, outf, tdp_level, &clx_n_pkg_dev); + free_cpu_set(ctdp_level->core_cpumask); + free_cpu_set(pbf_info->core_cpumask); + } +} + +static void dump_isst_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + struct isst_pkg_ctdp pkg_dev; + int ret; + + memset(&pkg_dev, 0, sizeof(pkg_dev)); + ret = isst_get_process_ctdp(id, tdp_level, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get perf-profile info on cpu", 1, id->cpu); + isst_ctdp_display_information_end(outf); + exit(1); + } else { + isst_ctdp_display_information(id, outf, tdp_level, &pkg_dev); + isst_get_process_ctdp_complete(id, &pkg_dev); + } +} + +static void dump_isst_config(int arg) +{ + void *fn; + + if (cmd_help) { + fprintf(stderr, + "Print Intel(R) Speed Select Technology Performance profile configuration\n"); + fprintf(stderr, + "including base frequency and turbo frequency configurations\n"); + fprintf(stderr, "Optional: -l|--level : Specify tdp level\n"); + fprintf(stderr, + "\tIf no arguments, dump information for all TDP levels\n"); + exit(0); + } + + if (!is_clx_n_platform()) + fn = dump_isst_config_for_cpu; + else + fn = dump_clx_n_config_for_cpu; + + isst_ctdp_display_information_start(outf); + + if (max_target_cpus) + for_each_online_target_cpu_in_set(fn, NULL, NULL, NULL, NULL); + else + for_each_online_package_in_set(fn, NULL, NULL, NULL, NULL); + + isst_ctdp_display_information_end(outf); +} + +static void adjust_scaling_max_from_base_freq(int cpu); + +static void set_tdp_level_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int ret; + + ret = isst_set_tdp_level(id, tdp_level); + if (ret) { + isst_display_error_info_message(1, "Set TDP level failed", 0, 0); + isst_ctdp_display_information_end(outf); + exit(1); + } else { + isst_display_result(id, outf, "perf-profile", "set_tdp_level", + ret); + if (force_online_offline) { + struct isst_pkg_ctdp_level_info ctdp_level; + + /* Wait for updated base frequencies */ + usleep(2000); + + fprintf(stderr, "Option is set to online/offline\n"); + ctdp_level.core_cpumask_size = + alloc_cpu_set(&ctdp_level.core_cpumask); + ret = isst_get_coremask_info(id, tdp_level, &ctdp_level); + if (ret) { + isst_display_error_info_message(1, "Can't get coremask, online/offline option is ignored", 0, 0); + return; + } + if (ctdp_level.cpu_count) { + int i, max_cpus = get_topo_max_cpus(); + for (i = 0; i < max_cpus; ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { + fprintf(stderr, "online cpu %d\n", i); + set_cpu_online_offline(i, 1); + adjust_scaling_max_from_base_freq(i); + } else { + fprintf(stderr, "offline cpu %d\n", i); + set_cpu_online_offline(i, 0); + } + } + } + } + } +} + +static void set_tdp_level(int arg) +{ + if (cmd_help) { + fprintf(stderr, "Set Config TDP level\n"); + fprintf(stderr, + "\t Arguments: -l|--level : Specify tdp level\n"); + fprintf(stderr, + "\t Optional Arguments: -o | online : online/offline for the tdp level\n"); + fprintf(stderr, + "\t online/offline operation has limitations, refer to Linux hotplug documentation\n"); + exit(0); + } + + if (tdp_level == 0xff) { + isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0); + exit(1); + } + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(set_tdp_level_for_cpu, NULL, + NULL, NULL, NULL); + else + for_each_online_package_in_set(set_tdp_level_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); +} + +static void clx_n_dump_pbf_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + int ret; + + ret = clx_n_config(id); + if (ret) { + isst_display_error_info_message(1, "clx_n_config failed", 0, 0); + } else { + struct isst_pkg_ctdp_level_info *ctdp_level; + struct isst_pbf_info *pbf_info; + + ctdp_level = &clx_n_pkg_dev.ctdp_level[0]; + pbf_info = &ctdp_level->pbf_info; + isst_pbf_display_information(id, outf, tdp_level, pbf_info); + free_cpu_set(ctdp_level->core_cpumask); + free_cpu_set(pbf_info->core_cpumask); + } +} + +static void dump_pbf_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + struct isst_pbf_info pbf_info; + int ret; + + ret = isst_get_pbf_info(id, tdp_level, &pbf_info); + if (ret) { + isst_display_error_info_message(1, "Failed to get base-freq info at this level", 1, tdp_level); + isst_ctdp_display_information_end(outf); + exit(1); + } else { + isst_pbf_display_information(id, outf, tdp_level, &pbf_info); + isst_get_pbf_info_complete(&pbf_info); + } +} + +static void dump_pbf_config(int arg) +{ + void *fn; + + if (cmd_help) { + fprintf(stderr, + "Print Intel(R) Speed Select Technology base frequency configuration for a TDP level\n"); + fprintf(stderr, + "\tArguments: -l|--level : Specify tdp level\n"); + exit(0); + } + + if (tdp_level == 0xff) { + isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0); + exit(1); + } + + if (!is_clx_n_platform()) + fn = dump_pbf_config_for_cpu; + else + fn = clx_n_dump_pbf_config_for_cpu; + + isst_ctdp_display_information_start(outf); + + if (max_target_cpus) + for_each_online_target_cpu_in_set(fn, NULL, NULL, NULL, NULL); + else + for_each_online_package_in_set(fn, NULL, NULL, NULL, NULL); + + isst_ctdp_display_information_end(outf); +} + +static int set_clos_param(struct isst_id *id, int clos, int epp, int wt, int min, int max) +{ + struct isst_clos_config clos_config; + int ret; + + ret = isst_pm_get_clos(id, clos, &clos_config); + if (ret) { + isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0); + return ret; + } + clos_config.clos_min = min; + clos_config.clos_max = max; + clos_config.epp = epp; + clos_config.clos_prop_prio = wt; + ret = isst_set_clos(id, clos, &clos_config); + if (ret) { + isst_display_error_info_message(1, "isst_set_clos failed", 0, 0); + return ret; + } + + return 0; +} + +static int set_cpufreq_scaling_min_max(int cpu, int max, int freq) +{ + char buffer[128], freq_str[16]; + int fd, ret, len; + + if (max) + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + else + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + + fd = open(buffer, O_WRONLY); + if (fd < 0) + return fd; + + snprintf(freq_str, sizeof(freq_str), "%d", freq); + len = strlen(freq_str); + ret = write(fd, freq_str, len); + if (ret == -1) { + close(fd); + return ret; + } + close(fd); + + return 0; +} + +static int no_turbo(void) +{ + return parse_int_file(0, "/sys/devices/system/cpu/intel_pstate/no_turbo"); +} + +static void adjust_scaling_max_from_base_freq(int cpu) +{ + int base_freq, scaling_max_freq; + + scaling_max_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_max_freq < base_freq || no_turbo()) + set_cpufreq_scaling_min_max(cpu, 1, base_freq); +} + +static void adjust_scaling_min_from_base_freq(int cpu) +{ + int base_freq, scaling_min_freq; + + scaling_min_freq = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + base_freq = get_cpufreq_base_freq(cpu); + if (scaling_min_freq < base_freq) + set_cpufreq_scaling_min_max(cpu, 0, base_freq); +} + +static int set_clx_pbf_cpufreq_scaling_min_max(struct isst_id *id) +{ + struct isst_pkg_ctdp_level_info *ctdp_level; + struct isst_pbf_info *pbf_info; + int i, freq, freq_high, freq_low; + int ret; + + ret = clx_n_config(id); + if (ret) { + debug_printf("cpufreq_scaling_min_max failed for CLX"); + return ret; + } + + ctdp_level = &clx_n_pkg_dev.ctdp_level[0]; + pbf_info = &ctdp_level->pbf_info; + freq_high = pbf_info->p1_high * 100000; + freq_low = pbf_info->p1_low * 100000; + + for (i = 0; i < get_topo_max_cpus(); ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + + if (CPU_ISSET_S(i, pbf_info->core_cpumask_size, + pbf_info->core_cpumask)) + freq = freq_high; + else + freq = freq_low; + + set_cpufreq_scaling_min_max(i, 1, freq); + set_cpufreq_scaling_min_max(i, 0, freq); + } + + return 0; +} + +static int set_cpufreq_scaling_min_max_from_cpuinfo(int cpu, int cpuinfo_max, int scaling_max) +{ + char buffer[128], min_freq[16]; + int fd, ret, len; + + if (!CPU_ISSET_S(cpu, present_cpumask_size, present_cpumask)) + return -1; + + if (cpuinfo_max) + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu); + else + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/cpuinfo_min_freq", cpu); + + fd = open(buffer, O_RDONLY); + if (fd < 0) + return fd; + + len = read(fd, min_freq, sizeof(min_freq)); + close(fd); + + if (len < 0) + return len; + + if (scaling_max) + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_max_freq", cpu); + else + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_min_freq", cpu); + + fd = open(buffer, O_WRONLY); + if (fd < 0) + return fd; + + len = strlen(min_freq); + ret = write(fd, min_freq, len); + if (ret == -1) { + close(fd); + return ret; + } + close(fd); + + return 0; +} + +static void set_scaling_min_to_cpuinfo_max(struct isst_id *id) +{ + int i; + + for (i = 0; i < get_topo_max_cpus(); ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + + adjust_scaling_max_from_base_freq(i); + set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 0); + adjust_scaling_min_from_base_freq(i); + } +} + +static void set_scaling_min_to_cpuinfo_min(struct isst_id *id) +{ + int i; + + for (i = 0; i < get_topo_max_cpus(); ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + + adjust_scaling_max_from_base_freq(i); + set_cpufreq_scaling_min_max_from_cpuinfo(i, 0, 0); + } +} + +static void set_scaling_max_to_cpuinfo_max(struct isst_id *id) +{ + int i; + + for (i = 0; i < get_topo_max_cpus(); ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + + set_cpufreq_scaling_min_max_from_cpuinfo(i, 1, 1); + } +} + +static int set_core_priority_and_min(struct isst_id *id, int mask_size, + cpu_set_t *cpu_mask, int min_high, + int min_low) +{ + int ret, i; + + if (!CPU_COUNT_S(mask_size, cpu_mask)) + return -1; + + ret = set_clos_param(id, 0, 0, 0, min_high, 0xff); + if (ret) + return ret; + + ret = set_clos_param(id, 1, 15, 15, min_low, 0xff); + if (ret) + return ret; + + ret = set_clos_param(id, 2, 15, 15, min_low, 0xff); + if (ret) + return ret; + + ret = set_clos_param(id, 3, 15, 15, min_low, 0xff); + if (ret) + return ret; + + for (i = 0; i < get_topo_max_cpus(); ++i) { + int clos; + struct isst_id tid; + + if (!is_cpu_in_power_domain(i, id)) + continue; + + if (CPU_ISSET_S(i, mask_size, cpu_mask)) + clos = 0; + else + clos = 3; + + debug_printf("Associate cpu: %d clos: %d\n", i, clos); + set_isst_id(&tid, i); + ret = isst_clos_associate(&tid, clos); + if (ret) { + isst_display_error_info_message(1, "isst_clos_associate failed", 0, 0); + return ret; + } + } + + return 0; +} + +static int set_pbf_core_power(struct isst_id *id) +{ + struct isst_pbf_info pbf_info; + struct isst_pkg_ctdp pkg_dev; + int ret; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + debug_printf("isst_get_ctdp_levels failed"); + return ret; + } + debug_printf("Current_level: %d\n", pkg_dev.current_level); + + ret = isst_get_pbf_info(id, pkg_dev.current_level, &pbf_info); + if (ret) { + debug_printf("isst_get_pbf_info failed"); + return ret; + } + debug_printf("p1_high: %d p1_low: %d\n", pbf_info.p1_high, + pbf_info.p1_low); + + ret = set_core_priority_and_min(id, pbf_info.core_cpumask_size, + pbf_info.core_cpumask, + pbf_info.p1_high, pbf_info.p1_low); + if (ret) { + debug_printf("set_core_priority_and_min failed"); + return ret; + } + + ret = isst_pm_qos_config(id, 1, 1); + if (ret) { + debug_printf("isst_pm_qos_config failed"); + return ret; + } + + return 0; +} + +static void set_pbf_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + int ret; + int status = *(int *)arg4; + + if (is_clx_n_platform()) { + ret = 0; + if (status) { + set_clx_pbf_cpufreq_scaling_min_max(id); + + } else { + set_scaling_max_to_cpuinfo_max(id); + set_scaling_min_to_cpuinfo_min(id); + } + goto disp_result; + } + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + goto disp_result; + } + + ret = isst_get_ctdp_control(id, pkg_dev.current_level, &ctdp_level); + if (ret) { + isst_display_error_info_message(1, "Failed to get current level", 0, 0); + goto disp_result; + } + + if (!ctdp_level.pbf_support) { + isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, pkg_dev.current_level); + ret = -1; + goto disp_result; + } + + if (auto_mode && status) { + ret = set_pbf_core_power(id); + if (ret) + goto disp_result; + } + + ret = isst_set_pbf_fact_status(id, 1, status); + if (ret) { + debug_printf("isst_set_pbf_fact_status failed"); + if (auto_mode) + isst_pm_qos_config(id, 0, 0); + } else { + if (auto_mode) { + if (status) + set_scaling_min_to_cpuinfo_max(id); + else + set_scaling_min_to_cpuinfo_min(id); + } + } + + if (auto_mode && !status) + isst_pm_qos_config(id, 0, 1); + +disp_result: + if (status) + isst_display_result(id, outf, "base-freq", "enable", + ret); + else + isst_display_result(id, outf, "base-freq", "disable", + ret); +} + +static void set_pbf_enable(int arg) +{ + int enable = arg; + + if (cmd_help) { + if (enable) { + fprintf(stderr, + "Enable Intel Speed Select Technology base frequency feature\n"); + if (is_clx_n_platform()) { + fprintf(stderr, + "\tOn this platform this command doesn't enable feature in the hardware.\n"); + fprintf(stderr, + "\tIt updates the cpufreq scaling_min_freq to match cpufreq base_frequency.\n"); + exit(0); + + } + fprintf(stderr, + "\tOptional Arguments: -a|--auto : Use priority of cores to set core-power associations\n"); + } else { + + if (is_clx_n_platform()) { + fprintf(stderr, + "\tOn this platform this command doesn't disable feature in the hardware.\n"); + fprintf(stderr, + "\tIt updates the cpufreq scaling_min_freq to match cpuinfo_min_freq\n"); + exit(0); + } + fprintf(stderr, + "Disable Intel Speed Select Technology base frequency feature\n"); + fprintf(stderr, + "\tOptional Arguments: -a|--auto : Also disable core-power associations\n"); + } + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(set_pbf_for_cpu, NULL, NULL, + NULL, &enable); + else + for_each_online_package_in_set(set_pbf_for_cpu, NULL, NULL, + NULL, &enable); + isst_ctdp_display_information_end(outf); +} + +static void dump_fact_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + struct isst_fact_info fact_info; + int ret; + + ret = isst_get_fact_info(id, tdp_level, fact_bucket, &fact_info); + if (ret) { + isst_display_error_info_message(1, "Failed to get turbo-freq info at this level", 1, tdp_level); + isst_ctdp_display_information_end(outf); + exit(1); + } else { + isst_fact_display_information(id, outf, tdp_level, fact_bucket, + fact_avx, &fact_info); + } +} + +static void dump_fact_config(int arg) +{ + if (cmd_help) { + fprintf(stderr, + "Print complete Intel Speed Select Technology turbo frequency configuration for a TDP level. Other arguments are optional.\n"); + fprintf(stderr, + "\tArguments: -l|--level : Specify tdp level\n"); + fprintf(stderr, + "\tArguments: -b|--bucket : Bucket index to dump\n"); + fprintf(stderr, + "\tArguments: -r|--trl-type : Specify trl type: sse|avx2|avx512\n"); + exit(0); + } + + if (tdp_level == 0xff) { + isst_display_error_info_message(1, "Invalid command: specify tdp_level\n", 0, 0); + exit(1); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(dump_fact_config_for_cpu, + NULL, NULL, NULL, NULL); + else + for_each_online_package_in_set(dump_fact_config_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); +} + +static void set_fact_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + int ret; + int status = *(int *)arg4; + + if (status && no_turbo()) { + isst_display_error_info_message(1, "Turbo mode is disabled", 0, 0); + ret = -1; + goto disp_results; + } + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + goto disp_results; + } + + ret = isst_get_ctdp_control(id, pkg_dev.current_level, &ctdp_level); + if (ret) { + isst_display_error_info_message(1, "Failed to get current level", 0, 0); + goto disp_results; + } + + if (!ctdp_level.fact_support) { + isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, pkg_dev.current_level); + ret = -1; + goto disp_results; + } + + if (status) { + ret = isst_pm_qos_config(id, 1, 1); + if (ret) + goto disp_results; + } + + ret = isst_set_pbf_fact_status(id, 0, status); + if (ret) { + debug_printf("isst_set_pbf_fact_status failed"); + if (auto_mode) + isst_pm_qos_config(id, 0, 0); + + goto disp_results; + } + + /* Set TRL */ + if (status) { + struct isst_pkg_ctdp pkg_dev; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (!ret) + ret = isst_set_trl(id, fact_trl); + if (ret && auto_mode) + isst_pm_qos_config(id, 0, 0); + } else { + if (auto_mode) + isst_pm_qos_config(id, 0, 0); + } + +disp_results: + if (status) { + isst_display_result(id, outf, "turbo-freq", "enable", ret); + if (ret) + fact_enable_fail = ret; + } else { + /* Since we modified TRL during Fact enable, restore it */ + isst_set_trl_from_current_tdp(id, fact_trl); + isst_display_result(id, outf, "turbo-freq", "disable", ret); + } +} + +static void set_fact_enable(int arg) +{ + int i, ret, enable = arg; + struct isst_id id; + + if (cmd_help) { + if (enable) { + fprintf(stderr, + "Enable Intel Speed Select Technology Turbo frequency feature\n"); + fprintf(stderr, + "Optional: -t|--trl : Specify turbo ratio limit\n"); + fprintf(stderr, + "\tOptional Arguments: -a|--auto : Designate specified target CPUs with"); + fprintf(stderr, + "-C|--cpu option as as high priority using core-power feature\n"); + } else { + fprintf(stderr, + "Disable Intel Speed Select Technology turbo frequency feature\n"); + fprintf(stderr, + "Optional: -t|--trl : Specify turbo ratio limit\n"); + fprintf(stderr, + "\tOptional Arguments: -a|--auto : Also disable core-power associations\n"); + } + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(set_fact_for_cpu, NULL, NULL, + NULL, &enable); + else + for_each_online_package_in_set(set_fact_for_cpu, NULL, NULL, + NULL, &enable); + isst_ctdp_display_information_end(outf); + + if (!fact_enable_fail && enable && auto_mode) { + /* + * When we adjust CLOS param, we have to set for siblings also. + * So for the each user specified CPU, also add the sibling + * in the present_cpu_mask. + */ + for (i = 0; i < get_topo_max_cpus(); ++i) { + char buffer[128], sibling_list[128], *cpu_str; + int fd, len; + + if (!CPU_ISSET_S(i, target_cpumask_size, target_cpumask)) + continue; + + snprintf(buffer, sizeof(buffer), + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", i); + + fd = open(buffer, O_RDONLY); + if (fd < 0) + continue; + + len = read(fd, sibling_list, sizeof(sibling_list)); + close(fd); + + if (len < 0) + continue; + + cpu_str = strtok(sibling_list, ","); + while (cpu_str != NULL) { + int cpu; + + sscanf(cpu_str, "%d", &cpu); + CPU_SET_S(cpu, target_cpumask_size, target_cpumask); + cpu_str = strtok(NULL, ","); + } + } + + for (i = 0; i < get_topo_max_cpus(); ++i) { + int clos; + + if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask)) + continue; + + set_isst_id(&id, i); + ret = set_clos_param(&id, 0, 0, 0, 0, 0xff); + if (ret) + goto error_disp; + + ret = set_clos_param(&id, 1, 15, 15, 0, 0xff); + if (ret) + goto error_disp; + + ret = set_clos_param(&id, 2, 15, 15, 0, 0xff); + if (ret) + goto error_disp; + + ret = set_clos_param(&id, 3, 15, 15, 0, 0xff); + if (ret) + goto error_disp; + + if (CPU_ISSET_S(i, target_cpumask_size, target_cpumask)) + clos = 0; + else + clos = 3; + + debug_printf("Associate cpu: %d clos: %d\n", i, clos); + ret = isst_clos_associate(&id, clos); + if (ret) + goto error_disp; + } + set_isst_id(&id, -1); + isst_display_result(&id, outf, "turbo-freq --auto", "enable", 0); + } + + return; + +error_disp: + isst_display_result(&id, outf, "turbo-freq --auto", "enable", ret); + +} + +static void enable_clos_qos_config(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int ret; + int status = *(int *)arg4; + + if (is_skx_based_platform()) + clos_priority_type = 1; + + ret = isst_pm_qos_config(id, status, clos_priority_type); + if (ret) + isst_display_error_info_message(1, "isst_pm_qos_config failed", 0, 0); + + if (status) + isst_display_result(id, outf, "core-power", "enable", + ret); + else + isst_display_result(id, outf, "core-power", "disable", + ret); +} + +static void set_clos_enable(int arg) +{ + int enable = arg; + + if (cmd_help) { + if (enable) { + fprintf(stderr, + "Enable core-power for a package/die\n"); + if (!is_skx_based_platform()) { + fprintf(stderr, + "\tClos Enable: Specify priority type with [--priority|-p]\n"); + fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n"); + } + } else { + fprintf(stderr, + "Disable core-power: [No command arguments are required]\n"); + } + exit(0); + } + + if (enable && cpufreq_sysfs_present()) { + fprintf(stderr, + "cpufreq subsystem and core-power enable will interfere with each other!\n"); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(enable_clos_qos_config, NULL, + NULL, NULL, &enable); + else + for_each_online_package_in_set(enable_clos_qos_config, NULL, + NULL, NULL, &enable); + isst_ctdp_display_information_end(outf); +} + +static void dump_clos_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + struct isst_clos_config clos_config; + int ret; + + ret = isst_pm_get_clos(id, current_clos, &clos_config); + if (ret) + isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0); + else + isst_clos_display_information(id, outf, current_clos, + &clos_config); +} + +static void dump_clos_config(int arg) +{ + if (cmd_help) { + fprintf(stderr, + "Print Intel Speed Select Technology core power configuration\n"); + fprintf(stderr, + "\tArguments: [-c | --clos]: Specify clos id\n"); + exit(0); + } + if (current_clos < 0 || current_clos > 3) { + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); + isst_ctdp_display_information_end(outf); + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(dump_clos_config_for_cpu, + NULL, NULL, NULL, NULL); + else + for_each_online_package_in_set(dump_clos_config_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); +} + +static void get_clos_info_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int enable, ret, prio_type; + + ret = isst_clos_get_clos_information(id, &enable, &prio_type); + if (ret) + isst_display_error_info_message(1, "isst_clos_get_info failed", 0, 0); + else { + int cp_state, cp_cap; + + isst_read_pm_config(id, &cp_state, &cp_cap); + isst_clos_display_clos_information(id, outf, enable, prio_type, + cp_state, cp_cap); + } +} + +static void dump_clos_info(int arg) +{ + if (cmd_help) { + fprintf(stderr, + "Print Intel Speed Select Technology core power information\n"); + fprintf(stderr, "\t Optionally specify targeted cpu id with [--cpu|-c]\n"); + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL, + NULL, NULL, NULL); + else + for_each_online_package_in_set(get_clos_info_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); + +} + +static void set_clos_config_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + struct isst_clos_config clos_config; + int ret; + + clos_config.epp = clos_epp; + clos_config.clos_prop_prio = clos_prop_prio; + clos_config.clos_min = clos_min; + clos_config.clos_max = clos_max; + clos_config.clos_desired = clos_desired; + ret = isst_set_clos(id, current_clos, &clos_config); + if (ret) + isst_display_error_info_message(1, "isst_set_clos failed", 0, 0); + else + isst_display_result(id, outf, "core-power", "config", ret); +} + +static void set_clos_config(int arg) +{ + if (cmd_help) { + fprintf(stderr, + "Set core-power configuration for one of the four clos ids\n"); + fprintf(stderr, + "\tSpecify targeted clos id with [--clos|-c]\n"); + if (!is_skx_based_platform()) { + fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n"); + fprintf(stderr, + "\tSpecify clos Proportional Priority [--weight|-w]\n"); + } + fprintf(stderr, "\tSpecify clos min in MHz with [--min|-n]\n"); + fprintf(stderr, "\tSpecify clos max in MHz with [--max|-m]\n"); + exit(0); + } + + if (current_clos < 0 || current_clos > 3) { + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); + exit(0); + } + if (!is_skx_based_platform() && (clos_epp < 0 || clos_epp > 0x0F)) { + fprintf(stderr, "clos epp is not specified or invalid, default: 0\n"); + clos_epp = 0; + } + if (!is_skx_based_platform() && (clos_prop_prio < 0 || clos_prop_prio > 0x0F)) { + fprintf(stderr, + "clos frequency weight is not specified or invalid, default: 0\n"); + clos_prop_prio = 0; + } + if (clos_min < 0) { + fprintf(stderr, "clos min is not specified, default: 0\n"); + clos_min = 0; + } + if (clos_max < 0) { + fprintf(stderr, "clos max is not specified, default: Max frequency (ratio 0xff)\n"); + clos_max = 0xff; + } + if (clos_desired) { + fprintf(stderr, "clos desired is not supported on this platform\n"); + clos_desired = 0x00; + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(set_clos_config_for_cpu, NULL, + NULL, NULL, NULL); + else + for_each_online_package_in_set(set_clos_config_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); +} + +static void set_clos_assoc_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int ret; + + ret = isst_clos_associate(id, current_clos); + if (ret) + debug_printf("isst_clos_associate failed"); + else + isst_display_result(id, outf, "core-power", "assoc", ret); +} + +static void set_clos_assoc(int arg) +{ + if (cmd_help) { + fprintf(stderr, "Associate a clos id to a CPU\n"); + fprintf(stderr, + "\tSpecify targeted clos id with [--clos|-c]\n"); + fprintf(stderr, + "\tFor example to associate clos 1 to CPU 0: issue\n"); + fprintf(stderr, + "\tintel-speed-select --cpu 0 core-power assoc --clos 1\n"); + exit(0); + } + + if (current_clos < 0 || current_clos > 3) { + isst_display_error_info_message(1, "Invalid clos id\n", 0, 0); + exit(0); + } + if (max_target_cpus) + for_each_online_target_cpu_in_set(set_clos_assoc_for_cpu, NULL, + NULL, NULL, NULL); + else { + isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0); + } +} + +static void get_clos_assoc_for_cpu(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + int clos, ret; + + ret = isst_clos_get_assoc_status(id, &clos); + if (ret) + isst_display_error_info_message(1, "isst_clos_get_assoc_status failed", 0, 0); + else + isst_clos_display_assoc_information(id, outf, clos); +} + +static void get_clos_assoc(int arg) +{ + if (cmd_help) { + fprintf(stderr, "Get associate clos id to a CPU\n"); + fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n"); + exit(0); + } + + if (!max_target_cpus) { + isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0); + exit(0); + } + + isst_ctdp_display_information_start(outf); + for_each_online_target_cpu_in_set(get_clos_assoc_for_cpu, NULL, + NULL, NULL, NULL); + isst_ctdp_display_information_end(outf); +} + +static void set_turbo_mode_for_cpu(struct isst_id *id, int status) +{ + int base_freq; + + if (status) { + base_freq = get_cpufreq_base_freq(id->cpu); + set_cpufreq_scaling_min_max(id->cpu, 1, base_freq); + } else { + set_scaling_max_to_cpuinfo_max(id); + } + + if (status) { + isst_display_result(id, outf, "turbo-mode", "enable", 0); + } else { + isst_display_result(id, outf, "turbo-mode", "disable", 0); + } +} + +static void set_turbo_mode(int arg) +{ + int i, enable = arg; + struct isst_id id; + + if (cmd_help) { + if (enable) + fprintf(stderr, "Set turbo mode enable\n"); + else + fprintf(stderr, "Set turbo mode disable\n"); + exit(0); + } + + isst_ctdp_display_information_start(outf); + + for (i = 0; i < topo_max_cpus; ++i) { + int online; + + if (i) + online = parse_int_file( + 1, "/sys/devices/system/cpu/cpu%d/online", i); + else + online = + 1; /* online entry for CPU 0 needs some special configs */ + + if (online) { + set_isst_id(&id, i); + set_turbo_mode_for_cpu(&id, enable); + } + + } + isst_ctdp_display_information_end(outf); +} + +static void get_set_trl(struct isst_id *id, void *arg1, void *arg2, void *arg3, + void *arg4) +{ + unsigned long long trl; + int set = *(int *)arg4; + int ret; + + if (set && !fact_trl) { + isst_display_error_info_message(1, "Invalid TRL. Specify with [-t|--trl]", 0, 0); + exit(0); + } + + if (set) { + ret = isst_set_trl(id, fact_trl); + isst_display_result(id, outf, "turbo-mode", "set-trl", ret); + return; + } + + ret = isst_get_trl(id, &trl); + if (ret) + isst_display_result(id, outf, "turbo-mode", "get-trl", ret); + else + isst_trl_display_information(id, outf, trl); +} + +static void process_trl(int arg) +{ + if (cmd_help) { + if (arg) { + fprintf(stderr, "Set TRL (turbo ratio limits)\n"); + fprintf(stderr, "\t t|--trl: Specify turbo ratio limit for setting TRL\n"); + } else { + fprintf(stderr, "Get TRL (turbo ratio limits)\n"); + } + exit(0); + } + + isst_ctdp_display_information_start(outf); + if (max_target_cpus) + for_each_online_target_cpu_in_set(get_set_trl, NULL, + NULL, NULL, &arg); + else + for_each_online_package_in_set(get_set_trl, NULL, + NULL, NULL, &arg); + isst_ctdp_display_information_end(outf); +} + +static struct process_cmd_struct clx_n_cmds[] = { + { "perf-profile", "info", dump_isst_config, 0 }, + { "base-freq", "info", dump_pbf_config, 0 }, + { "base-freq", "enable", set_pbf_enable, 1 }, + { "base-freq", "disable", set_pbf_enable, 0 }, + { NULL, NULL, NULL, 0 } +}; + +static struct process_cmd_struct isst_cmds[] = { + { "perf-profile", "get-lock-status", get_tdp_locked, 0 }, + { "perf-profile", "get-config-levels", get_tdp_levels, 0 }, + { "perf-profile", "get-config-version", get_tdp_version, 0 }, + { "perf-profile", "get-config-enabled", get_tdp_enabled, 0 }, + { "perf-profile", "get-config-current-level", get_tdp_current_level, + 0 }, + { "perf-profile", "set-config-level", set_tdp_level, 0 }, + { "perf-profile", "info", dump_isst_config, 0 }, + { "base-freq", "info", dump_pbf_config, 0 }, + { "base-freq", "enable", set_pbf_enable, 1 }, + { "base-freq", "disable", set_pbf_enable, 0 }, + { "turbo-freq", "info", dump_fact_config, 0 }, + { "turbo-freq", "enable", set_fact_enable, 1 }, + { "turbo-freq", "disable", set_fact_enable, 0 }, + { "core-power", "info", dump_clos_info, 0 }, + { "core-power", "enable", set_clos_enable, 1 }, + { "core-power", "disable", set_clos_enable, 0 }, + { "core-power", "config", set_clos_config, 0 }, + { "core-power", "get-config", dump_clos_config, 0 }, + { "core-power", "assoc", set_clos_assoc, 0 }, + { "core-power", "get-assoc", get_clos_assoc, 0 }, + { "turbo-mode", "enable", set_turbo_mode, 0 }, + { "turbo-mode", "disable", set_turbo_mode, 1 }, + { "turbo-mode", "get-trl", process_trl, 0 }, + { "turbo-mode", "set-trl", process_trl, 1 }, + { NULL, NULL, NULL } +}; + +/* + * parse cpuset with following syntax + * 1,2,4..6,8-10 and set bits in cpu_subset + */ +void parse_cpu_command(char *optarg) +{ + unsigned int start, end; + char *next; + + next = optarg; + + while (next && *next) { + if (*next == '-') /* no negative cpu numbers */ + goto error; + + start = strtoul(next, &next, 10); + + if (max_target_cpus < MAX_CPUS_IN_ONE_REQ) + target_cpus[max_target_cpus++] = start; + + if (*next == '\0') + break; + + if (*next == ',') { + next += 1; + continue; + } + + if (*next == '-') { + next += 1; /* start range */ + } else if (*next == '.') { + next += 1; + if (*next == '.') + next += 1; /* start range */ + else + goto error; + } + + end = strtoul(next, &next, 10); + if (end <= start) + goto error; + + while (++start <= end) { + if (max_target_cpus < MAX_CPUS_IN_ONE_REQ) + target_cpus[max_target_cpus++] = start; + } + + if (*next == ',') + next += 1; + else if (*next != '\0') + goto error; + } + +#ifdef DEBUG + { + int i; + + for (i = 0; i < max_target_cpus; ++i) + printf("cpu [%d] in arg\n", target_cpus[i]); + } +#endif + return; + +error: + fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); + exit(-1); +} + +static void parse_cmd_args(int argc, int start, char **argv) +{ + int opt; + int option_index; + + static struct option long_options[] = { + { "bucket", required_argument, 0, 'b' }, + { "level", required_argument, 0, 'l' }, + { "online", required_argument, 0, 'o' }, + { "trl-type", required_argument, 0, 'r' }, + { "trl", required_argument, 0, 't' }, + { "help", no_argument, 0, 'h' }, + { "clos", required_argument, 0, 'c' }, + { "desired", required_argument, 0, 'd' }, + { "epp", required_argument, 0, 'e' }, + { "min", required_argument, 0, 'n' }, + { "max", required_argument, 0, 'm' }, + { "priority", required_argument, 0, 'p' }, + { "weight", required_argument, 0, 'w' }, + { "auto", no_argument, 0, 'a' }, + { 0, 0, 0, 0 } + }; + + option_index = start; + + optind = start + 1; + while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:r:hoa", + long_options, &option_index)) != -1) { + switch (opt) { + case 'a': + auto_mode = 1; + break; + case 'b': + fact_bucket = atoi(optarg); + break; + case 'h': + cmd_help = 1; + break; + case 'l': + tdp_level = atoi(optarg); + break; + case 'o': + force_online_offline = 1; + break; + case 't': + sscanf(optarg, "0x%llx", &fact_trl); + break; + case 'r': + if (!strncmp(optarg, "sse", 3)) { + fact_avx = 0x01; + } else if (!strncmp(optarg, "avx2", 4)) { + fact_avx = 0x02; + } else if (!strncmp(optarg, "avx512", 6)) { + fact_avx = 0x04; + } else { + fprintf(outf, "Invalid sse,avx options\n"); + exit(1); + } + break; + /* CLOS related */ + case 'c': + current_clos = atoi(optarg); + break; + case 'd': + clos_desired = atoi(optarg); + clos_desired /= DISP_FREQ_MULTIPLIER; + break; + case 'e': + clos_epp = atoi(optarg); + if (is_skx_based_platform()) { + isst_display_error_info_message(1, "epp can't be specified on this platform", 0, 0); + exit(0); + } + break; + case 'n': + clos_min = atoi(optarg); + clos_min /= DISP_FREQ_MULTIPLIER; + break; + case 'm': + clos_max = atoi(optarg); + clos_max /= DISP_FREQ_MULTIPLIER; + break; + case 'p': + clos_priority_type = atoi(optarg); + if (is_skx_based_platform() && !clos_priority_type) { + isst_display_error_info_message(1, "Invalid clos priority type: proportional for this platform", 0, 0); + exit(0); + } + break; + case 'w': + clos_prop_prio = atoi(optarg); + if (is_skx_based_platform()) { + isst_display_error_info_message(1, "weight can't be specified on this platform", 0, 0); + exit(0); + } + break; + default: + printf("Unknown option: ignore\n"); + } + } + + if (argv[optind]) + printf("Garbage at the end of command: ignore\n"); +} + +static void isst_help(void) +{ + printf("perf-profile:\tAn architectural mechanism that allows multiple optimized \n\ + performance profiles per system via static and/or dynamic\n\ + adjustment of core count, workload, Tjmax, and\n\ + TDP, etc.\n"); + printf("\nCommands : For feature=perf-profile\n"); + printf("\tinfo\n"); + + if (!is_clx_n_platform()) { + printf("\tget-lock-status\n"); + printf("\tget-config-levels\n"); + printf("\tget-config-version\n"); + printf("\tget-config-enabled\n"); + printf("\tget-config-current-level\n"); + printf("\tset-config-level\n"); + } +} + +static void pbf_help(void) +{ + printf("base-freq:\tEnables users to increase guaranteed base frequency\n\ + on certain cores (high priority cores) in exchange for lower\n\ + base frequency on remaining cores (low priority cores).\n"); + printf("\tcommand : info\n"); + printf("\tcommand : enable\n"); + printf("\tcommand : disable\n"); +} + +static void fact_help(void) +{ + printf("turbo-freq:\tEnables the ability to set different turbo ratio\n\ + limits to cores based on priority.\n"); + printf("\nCommand: For feature=turbo-freq\n"); + printf("\tcommand : info\n"); + printf("\tcommand : enable\n"); + printf("\tcommand : disable\n"); +} + +static void turbo_mode_help(void) +{ + printf("turbo-mode:\tEnables users to enable/disable turbo mode by adjusting frequency settings. Also allows to get and set turbo ratio limits (TRL).\n"); + printf("\tcommand : enable\n"); + printf("\tcommand : disable\n"); + printf("\tcommand : get-trl\n"); + printf("\tcommand : set-trl\n"); +} + + +static void core_power_help(void) +{ + printf("core-power:\tInterface that allows user to define per core/tile\n\ + priority.\n"); + printf("\nCommands : For feature=core-power\n"); + printf("\tinfo\n"); + printf("\tenable\n"); + printf("\tdisable\n"); + printf("\tconfig\n"); + printf("\tget-config\n"); + printf("\tassoc\n"); + printf("\tget-assoc\n"); +} + +struct process_cmd_help_struct { + char *feature; + void (*process_fn)(void); +}; + +static struct process_cmd_help_struct isst_help_cmds[] = { + { "perf-profile", isst_help }, + { "base-freq", pbf_help }, + { "turbo-freq", fact_help }, + { "core-power", core_power_help }, + { "turbo-mode", turbo_mode_help }, + { NULL, NULL } +}; + +static struct process_cmd_help_struct clx_n_help_cmds[] = { + { "perf-profile", isst_help }, + { "base-freq", pbf_help }, + { NULL, NULL } +}; + +void process_command(int argc, char **argv, + struct process_cmd_help_struct *help_cmds, + struct process_cmd_struct *cmds) +{ + int i = 0, matched = 0; + char *feature = argv[optind]; + char *cmd = argv[optind + 1]; + + if (!feature || !cmd) + return; + + debug_printf("feature name [%s] command [%s]\n", feature, cmd); + if (!strcmp(cmd, "-h") || !strcmp(cmd, "--help")) { + while (help_cmds[i].feature) { + if (!strcmp(help_cmds[i].feature, feature)) { + help_cmds[i].process_fn(); + exit(0); + } + ++i; + } + } + + i = 0; + while (cmds[i].feature) { + if (!strcmp(cmds[i].feature, feature) && + !strcmp(cmds[i].command, cmd)) { + parse_cmd_args(argc, optind + 1, argv); + cmds[i].process_fn(cmds[i].arg); + matched = 1; + break; + } + ++i; + } + + if (!matched) + fprintf(stderr, "Invalid command\n"); +} + +static void usage(void) +{ + if (is_clx_n_platform()) { + fprintf(stderr, "\nThere is limited support of Intel Speed Select features on this platform.\n"); + fprintf(stderr, "Everything is pre-configured using BIOS options, this tool can't enable any feature in the hardware.\n\n"); + } + + printf("\nUsage:\n"); + printf("intel-speed-select [OPTIONS] FEATURE COMMAND COMMAND_ARGUMENTS\n"); + printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features:\n"); + if (is_clx_n_platform()) + printf("\nFEATURE : [perf-profile|base-freq]\n"); + else + printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power|turbo-mode]\n"); + printf("\nFor help on each feature, use -h|--help\n"); + printf("\tFor example: intel-speed-select perf-profile -h\n"); + + printf("\nFor additional help on each command for a feature, use --h|--help\n"); + printf("\tFor example: intel-speed-select perf-profile get-lock-status -h\n"); + printf("\t\t This will print help for the command \"get-lock-status\" for the feature \"perf-profile\"\n"); + + printf("\nOPTIONS\n"); + printf("\t[-c|--cpu] : logical cpu number\n"); + printf("\t\tDefault: Die scoped for all dies in the system with multiple dies/package\n"); + printf("\t\t\t Or Package scoped for all Packages when each package contains one die\n"); + printf("\t[-d|--debug] : Debug mode\n"); + printf("\t[-f|--format] : output format [json|text]. Default: text\n"); + printf("\t[-h|--help] : Print help\n"); + printf("\t[-i|--info] : Print platform information\n"); + printf("\t[-a|--all-cpus-online] : Force online every CPU in the system\n"); + printf("\t[-o|--out] : Output file\n"); + printf("\t\t\tDefault : stderr\n"); + printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n"); + printf("\t[-r|--retry] : Retry count for mail box commands on failure, default 3\n"); + printf("\t[-v|--version] : Print version\n"); + printf("\t[-b|--oob : Start a daemon to process HFI events for perf profile change from Out of Band agent.\n"); + printf("\t[-n|--no-daemon : Don't run as daemon. By default --oob will turn on daemon mode\n"); + printf("\t[-w|--delay : Delay for reading config level state change in OOB poll mode.\n"); + printf("\nResult format\n"); + printf("\tResult display uses a common format for each command:\n"); + printf("\tResults are formatted in text/JSON with\n"); + printf("\t\tPackage, Die, CPU, and command specific results.\n"); + + printf("\nExamples\n"); + printf("\tTo get platform information:\n"); + printf("\t\tintel-speed-select --info\n"); + printf("\tTo get full perf-profile information dump:\n"); + printf("\t\tintel-speed-select perf-profile info\n"); + printf("\tTo get full base-freq information dump:\n"); + printf("\t\tintel-speed-select base-freq info -l 0\n"); + if (!is_clx_n_platform()) { + printf("\tTo get full turbo-freq information dump:\n"); + printf("\t\tintel-speed-select turbo-freq info -l 0\n"); + } + exit(1); +} + +static void print_version(void) +{ + fprintf(outf, "Version %s\n", version_str); + exit(0); +} + +static void cmdline(int argc, char **argv) +{ + const char *pathname = "/dev/isst_interface"; + char *ptr; + FILE *fp; + int opt, force_cpus_online = 0; + int option_index = 0; + int ret; + int oob_mode = 0; + int poll_interval = -1; + int no_daemon = 0; + + static struct option long_options[] = { + { "all-cpus-online", no_argument, 0, 'a' }, + { "cpu", required_argument, 0, 'c' }, + { "debug", no_argument, 0, 'd' }, + { "format", required_argument, 0, 'f' }, + { "help", no_argument, 0, 'h' }, + { "info", no_argument, 0, 'i' }, + { "pause", required_argument, 0, 'p' }, + { "out", required_argument, 0, 'o' }, + { "retry", required_argument, 0, 'r' }, + { "version", no_argument, 0, 'v' }, + { "oob", no_argument, 0, 'b' }, + { "no-daemon", no_argument, 0, 'n' }, + { "poll-interval", required_argument, 0, 'w' }, + { 0, 0, 0, 0 } + }; + + if (geteuid() != 0) { + fprintf(stderr, "Must run as root\n"); + exit(0); + } + + ret = update_cpu_model(); + if (ret) + err(-1, "Invalid CPU model (%d)\n", cpu_model); + printf("Intel(R) Speed Select Technology\n"); + printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model); + + if (!is_clx_n_platform()) { + fp = fopen(pathname, "rb"); + if (!fp) { + fprintf(stderr, "Intel speed select drivers are not loaded on this system.\n"); + fprintf(stderr, "Verify that kernel config includes CONFIG_INTEL_SPEED_SELECT_INTERFACE.\n"); + fprintf(stderr, "If the config is included then this is not a supported platform.\n"); + exit(0); + } + fclose(fp); + } + + progname = argv[0]; + while ((opt = getopt_long_only(argc, argv, "+c:df:hio:vabw:n", long_options, + &option_index)) != -1) { + switch (opt) { + case 'a': + force_cpus_online = 1; + break; + case 'c': + parse_cpu_command(optarg); + break; + case 'd': + debug_flag = 1; + printf("Debug Mode ON\n"); + break; + case 'f': + if (!strncmp(optarg, "json", 4)) + out_format_json = 1; + break; + case 'h': + usage(); + break; + case 'i': + isst_print_platform_information(); + break; + case 'o': + if (outf) + fclose(outf); + outf = fopen_or_exit(optarg, "w"); + break; + case 'p': + ret = strtol(optarg, &ptr, 10); + if (!ret) + fprintf(stderr, "Invalid pause interval, ignore\n"); + else + mbox_delay = ret; + break; + case 'r': + ret = strtol(optarg, &ptr, 10); + if (!ret) + fprintf(stderr, "Invalid retry count, ignore\n"); + else + mbox_retries = ret; + break; + case 'v': + print_version(); + break; + case 'b': + oob_mode = 1; + break; + case 'n': + no_daemon = 1; + break; + case 'w': + ret = strtol(optarg, &ptr, 10); + if (!ret) { + fprintf(stderr, "Invalid poll interval count\n"); + exit(0); + } + poll_interval = ret; + break; + default: + usage(); + } + } + + if (optind > (argc - 2) && !oob_mode) { + usage(); + exit(0); + } + set_max_cpu_num(); + if (force_cpus_online) + force_all_cpus_online(); + store_cpu_topology(); + create_cpu_map(); + + if (oob_mode) { + if (debug_flag) + fprintf(stderr, "OOB mode is enabled in debug mode\n"); + + ret = isst_daemon(debug_flag, poll_interval, no_daemon); + if (ret) + fprintf(stderr, "OOB mode enable failed\n"); + goto out; + } + + if (!is_clx_n_platform()) { + ret = isst_fill_platform_info(); + if (ret) + goto out; + process_command(argc, argv, isst_help_cmds, isst_cmds); + } else { + process_command(argc, argv, clx_n_help_cmds, clx_n_cmds); + } +out: + free_cpu_set(present_cpumask); + free_cpu_set(target_cpumask); +} + +int main(int argc, char **argv) +{ + outf = stderr; + cmdline(argc, argv); + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c new file mode 100644 index 000000000..f701b45c8 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-core.c @@ -0,0 +1,1047 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Enumerate and control features + * Copyright (c) 2019 Intel Corporation. + */ + +#include "isst.h" + +int isst_write_pm_config(struct isst_id *id, int cp_state) +{ + unsigned int req, resp; + int ret; + + if (cp_state) + req = BIT(16); + else + req = 0; + + ret = isst_send_mbox_command(id->cpu, WRITE_PM_CONFIG, PM_FEATURE, 0, req, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d WRITE_PM_CONFIG resp:%x\n", id->cpu, resp); + + return 0; +} + +int isst_read_pm_config(struct isst_id *id, int *cp_state, int *cp_cap) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, READ_PM_CONFIG, PM_FEATURE, 0, 0, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d READ_PM_CONFIG resp:%x\n", id->cpu, resp); + + *cp_state = resp & BIT(16); + *cp_cap = resp & BIT(0) ? 1 : 0; + + return 0; +} + +int isst_get_ctdp_levels(struct isst_id *id, struct isst_pkg_ctdp *pkg_dev) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_LEVELS_INFO, 0, 0, &resp); + if (ret) { + pkg_dev->levels = 0; + pkg_dev->locked = 1; + pkg_dev->current_level = 0; + pkg_dev->version = 0; + pkg_dev->enabled = 0; + return 0; + } + + debug_printf("cpu:%d CONFIG_TDP_GET_LEVELS_INFO resp:%x\n", id->cpu, resp); + + pkg_dev->version = resp & 0xff; + pkg_dev->levels = (resp >> 8) & 0xff; + pkg_dev->current_level = (resp >> 16) & 0xff; + pkg_dev->locked = !!(resp & BIT(24)); + pkg_dev->enabled = !!(resp & BIT(31)); + + return 0; +} + +int isst_get_ctdp_control(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + int cp_state, cp_cap; + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_TDP_CONTROL, 0, + config_index, &resp); + if (ret) + return ret; + + ctdp_level->fact_support = resp & BIT(0); + ctdp_level->pbf_support = !!(resp & BIT(1)); + ctdp_level->fact_enabled = !!(resp & BIT(16)); + ctdp_level->pbf_enabled = !!(resp & BIT(17)); + + ret = isst_read_pm_config(id, &cp_state, &cp_cap); + if (ret) { + debug_printf("cpu:%d pm_config is not supported\n", id->cpu); + } else { + debug_printf("cpu:%d pm_config SST-CP state:%d cap:%d\n", id->cpu, cp_state, cp_cap); + ctdp_level->sst_cp_support = cp_cap; + ctdp_level->sst_cp_enabled = cp_state; + } + + debug_printf( + "cpu:%d CONFIG_TDP_GET_TDP_CONTROL resp:%x fact_support:%d pbf_support: %d fact_enabled:%d pbf_enabled:%d\n", + id->cpu, resp, ctdp_level->fact_support, ctdp_level->pbf_support, + ctdp_level->fact_enabled, ctdp_level->pbf_enabled); + + return 0; +} + +int isst_get_tdp_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_GET_TDP_INFO, + 0, config_index, &resp); + if (ret) { + isst_display_error_info_message(1, "Invalid level, Can't get TDP information at level", 1, config_index); + return ret; + } + + ctdp_level->pkg_tdp = resp & GENMASK(14, 0); + ctdp_level->tdp_ratio = (resp & GENMASK(23, 16)) >> 16; + + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_TDP_INFO resp:%x tdp_ratio:%d pkg_tdp:%d\n", + id->cpu, config_index, resp, ctdp_level->tdp_ratio, + ctdp_level->pkg_tdp); + return 0; +} + +int isst_get_pwr_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_GET_PWR_INFO, + 0, config_index, &resp); + if (ret) + return ret; + + ctdp_level->pkg_max_power = resp & GENMASK(14, 0); + ctdp_level->pkg_min_power = (resp & GENMASK(30, 16)) >> 16; + + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_PWR_INFO resp:%x pkg_max_power:%d pkg_min_power:%d\n", + id->cpu, config_index, resp, ctdp_level->pkg_max_power, + ctdp_level->pkg_min_power); + + return 0; +} + +void isst_get_uncore_p0_p1_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_UNCORE_P0_P1_INFO, 0, + config_index, &resp); + if (ret) { + ctdp_level->uncore_p0 = 0; + ctdp_level->uncore_p1 = 0; + return; + } + + ctdp_level->uncore_p0 = resp & GENMASK(7, 0); + ctdp_level->uncore_p1 = (resp & GENMASK(15, 8)) >> 8; + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_UNCORE_P0_P1_INFO resp:%x uncore p0:%d uncore p1:%d\n", + id->cpu, config_index, resp, ctdp_level->uncore_p0, + ctdp_level->uncore_p1); +} + +void isst_get_p1_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_GET_P1_INFO, 0, + config_index, &resp); + if (ret) { + ctdp_level->sse_p1 = 0; + ctdp_level->avx2_p1 = 0; + ctdp_level->avx512_p1 = 0; + return; + } + + ctdp_level->sse_p1 = resp & GENMASK(7, 0); + ctdp_level->avx2_p1 = (resp & GENMASK(15, 8)) >> 8; + ctdp_level->avx512_p1 = (resp & GENMASK(23, 16)) >> 16; + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_P1_INFO resp:%x sse_p1:%d avx2_p1:%d avx512_p1:%d\n", + id->cpu, config_index, resp, ctdp_level->sse_p1, + ctdp_level->avx2_p1, ctdp_level->avx512_p1); +} + +void isst_get_uncore_mem_freq(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_GET_MEM_FREQ, + 0, config_index, &resp); + if (ret) { + ctdp_level->mem_freq = 0; + return; + } + + ctdp_level->mem_freq = resp & GENMASK(7, 0); + if (is_spr_platform()) { + ctdp_level->mem_freq *= 200; + } else if (is_icx_platform()) { + if (ctdp_level->mem_freq < 7) { + ctdp_level->mem_freq = (12 - ctdp_level->mem_freq) * 133.33 * 2 * 10; + ctdp_level->mem_freq /= 10; + if (ctdp_level->mem_freq % 10 > 5) + ctdp_level->mem_freq++; + } else { + ctdp_level->mem_freq = 0; + } + } else { + ctdp_level->mem_freq = 0; + } + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_MEM_FREQ resp:%x uncore mem_freq:%d\n", + id->cpu, config_index, resp, ctdp_level->mem_freq); +} + +int isst_get_tjmax_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_GET_TJMAX_INFO, + 0, config_index, &resp); + if (ret) + return ret; + + ctdp_level->t_proc_hot = resp & GENMASK(7, 0); + + debug_printf( + "cpu:%d ctdp:%d CONFIG_TDP_GET_TJMAX_INFO resp:%x t_proc_hot:%d\n", + id->cpu, config_index, resp, ctdp_level->t_proc_hot); + + return 0; +} + +int isst_get_coremask_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level) +{ + unsigned int resp; + int i, ret; + + ctdp_level->cpu_count = 0; + for (i = 0; i < 2; ++i) { + unsigned long long mask; + int cpu_count = 0; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_CORE_MASK, 0, + (i << 8) | config_index, &resp); + if (ret) + return ret; + + debug_printf( + "cpu:%d ctdp:%d mask:%d CONFIG_TDP_GET_CORE_MASK resp:%x\n", + id->cpu, config_index, i, resp); + + mask = (unsigned long long)resp << (32 * i); + set_cpu_mask_from_punit_coremask(id, mask, + ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask, + &cpu_count); + ctdp_level->cpu_count += cpu_count; + debug_printf("cpu:%d ctdp:%d mask:%d cpu count:%d\n", id->cpu, + config_index, i, ctdp_level->cpu_count); + } + + return 0; +} + +int isst_get_get_trl_from_msr(struct isst_id *id, int *trl) +{ + unsigned long long msr_trl; + int ret; + + ret = isst_send_msr_command(id->cpu, 0x1AD, 0, &msr_trl); + if (ret) + return ret; + + trl[0] = msr_trl & GENMASK(7, 0); + trl[1] = (msr_trl & GENMASK(15, 8)) >> 8; + trl[2] = (msr_trl & GENMASK(23, 16)) >> 16; + trl[3] = (msr_trl & GENMASK(31, 24)) >> 24; + trl[4] = (msr_trl & GENMASK(39, 32)) >> 32; + trl[5] = (msr_trl & GENMASK(47, 40)) >> 40; + trl[6] = (msr_trl & GENMASK(55, 48)) >> 48; + trl[7] = (msr_trl & GENMASK(63, 56)) >> 56; + + return 0; +} + +int isst_get_get_trl(struct isst_id *id, int level, int avx_level, int *trl) +{ + unsigned int req, resp; + int ret; + + req = level | (avx_level << 16); + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_TURBO_LIMIT_RATIOS, 0, req, + &resp); + if (ret) + return ret; + + debug_printf( + "cpu:%d CONFIG_TDP_GET_TURBO_LIMIT_RATIOS req:%x resp:%x\n", + id->cpu, req, resp); + + trl[0] = resp & GENMASK(7, 0); + trl[1] = (resp & GENMASK(15, 8)) >> 8; + trl[2] = (resp & GENMASK(23, 16)) >> 16; + trl[3] = (resp & GENMASK(31, 24)) >> 24; + + req = level | BIT(8) | (avx_level << 16); + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_TURBO_LIMIT_RATIOS, 0, req, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_GET_TURBO_LIMIT req:%x resp:%x\n", id->cpu, + req, resp); + + trl[4] = resp & GENMASK(7, 0); + trl[5] = (resp & GENMASK(15, 8)) >> 8; + trl[6] = (resp & GENMASK(23, 16)) >> 16; + trl[7] = (resp & GENMASK(31, 24)) >> 24; + + return 0; +} + +int isst_get_trl_bucket_info(struct isst_id *id, unsigned long long *buckets_info) +{ + int ret; + + debug_printf("cpu:%d bucket info via MSR\n", id->cpu); + + *buckets_info = 0; + + ret = isst_send_msr_command(id->cpu, 0x1ae, 0, buckets_info); + if (ret) + return ret; + + debug_printf("cpu:%d bucket info via MSR successful 0x%llx\n", id->cpu, + *buckets_info); + + return 0; +} + +int isst_set_tdp_level(struct isst_id *id, int tdp_level) +{ + unsigned int resp; + int ret; + + + if (isst_get_config_tdp_lock_status(id)) { + isst_display_error_info_message(1, "TDP is locked", 0, 0); + return -1; + + } + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, CONFIG_TDP_SET_LEVEL, 0, + tdp_level, &resp); + if (ret) { + isst_display_error_info_message(1, "Set TDP level failed for level", 1, tdp_level); + return ret; + } + + return 0; +} + +int isst_get_pbf_info(struct isst_id *id, int level, struct isst_pbf_info *pbf_info) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + int i, ret, max_punit_core, max_mask_index; + unsigned int req, resp; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + return ret; + } + + if (level > pkg_dev.levels) { + isst_display_error_info_message(1, "Invalid level", 1, level); + return -1; + } + + ret = isst_get_ctdp_control(id, level, &ctdp_level); + if (ret) + return ret; + + if (!ctdp_level.pbf_support) { + isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, level); + return -1; + } + + pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask); + + max_punit_core = get_max_punit_core_id(id); + max_mask_index = max_punit_core > 32 ? 2 : 1; + + for (i = 0; i < max_mask_index; ++i) { + unsigned long long mask; + int count; + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_PBF_GET_CORE_MASK_INFO, + 0, (i << 8) | level, &resp); + if (ret) + break; + + debug_printf( + "cpu:%d CONFIG_TDP_PBF_GET_CORE_MASK_INFO resp:%x\n", + id->cpu, resp); + + mask = (unsigned long long)resp << (32 * i); + set_cpu_mask_from_punit_coremask(id, mask, + pbf_info->core_cpumask_size, + pbf_info->core_cpumask, + &count); + } + + req = level; + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_PBF_GET_P1HI_P1LO_INFO, 0, req, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_PBF_GET_P1HI_P1LO_INFO resp:%x\n", id->cpu, + resp); + + pbf_info->p1_low = resp & 0xff; + pbf_info->p1_high = (resp & GENMASK(15, 8)) >> 8; + + req = level; + ret = isst_send_mbox_command( + id->cpu, CONFIG_TDP, CONFIG_TDP_PBF_GET_TDP_INFO, 0, req, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_PBF_GET_TDP_INFO resp:%x\n", id->cpu, resp); + + pbf_info->tdp = resp & 0xffff; + + req = level; + ret = isst_send_mbox_command( + id->cpu, CONFIG_TDP, CONFIG_TDP_PBF_GET_TJ_MAX_INFO, 0, req, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_PBF_GET_TJ_MAX_INFO resp:%x\n", id->cpu, + resp); + pbf_info->t_control = (resp >> 8) & 0xff; + pbf_info->t_prochot = resp & 0xff; + + return 0; +} + +void isst_get_pbf_info_complete(struct isst_pbf_info *pbf_info) +{ + free_cpu_set(pbf_info->core_cpumask); +} + +int isst_set_pbf_fact_status(struct isst_id *id, int pbf, int enable) +{ + struct isst_pkg_ctdp pkg_dev; + struct isst_pkg_ctdp_level_info ctdp_level; + int current_level; + unsigned int req = 0, resp; + int ret; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) + debug_printf("cpu:%d No support for dynamic ISST\n", id->cpu); + + current_level = pkg_dev.current_level; + + ret = isst_get_ctdp_control(id, current_level, &ctdp_level); + if (ret) + return ret; + + if (pbf) { + if (ctdp_level.fact_enabled) + req = BIT(16); + + if (enable) + req |= BIT(17); + else + req &= ~BIT(17); + } else { + + if (enable && !ctdp_level.sst_cp_enabled) + isst_display_error_info_message(0, "Make sure to execute before: core-power enable", 0, 0); + + if (ctdp_level.pbf_enabled) + req = BIT(17); + + if (enable) + req |= BIT(16); + else + req &= ~BIT(16); + } + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_SET_TDP_CONTROL, 0, req, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_SET_TDP_CONTROL pbf/fact:%d req:%x\n", + id->cpu, pbf, req); + + return 0; +} + +int isst_get_fact_bucket_info(struct isst_id *id, int level, + struct isst_fact_bucket_info *bucket_info) +{ + unsigned int resp; + int i, k, ret; + + for (i = 0; i < 2; ++i) { + int j; + + ret = isst_send_mbox_command( + id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_NUMCORES, 0, + (i << 8) | level, &resp); + if (ret) + return ret; + + debug_printf( + "cpu:%d CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_NUMCORES index:%d level:%d resp:%x\n", + id->cpu, i, level, resp); + + for (j = 0; j < 4; ++j) { + bucket_info[j + (i * 4)].high_priority_cores_count = + (resp >> (j * 8)) & 0xff; + } + } + + for (k = 0; k < 3; ++k) { + for (i = 0; i < 2; ++i) { + int j; + + ret = isst_send_mbox_command( + id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_RATIOS, 0, + (k << 16) | (i << 8) | level, &resp); + if (ret) + return ret; + + debug_printf( + "cpu:%d CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_RATIOS index:%d level:%d avx:%d resp:%x\n", + id->cpu, i, level, k, resp); + + for (j = 0; j < 4; ++j) { + switch (k) { + case 0: + bucket_info[j + (i * 4)].sse_trl = + (resp >> (j * 8)) & 0xff; + break; + case 1: + bucket_info[j + (i * 4)].avx_trl = + (resp >> (j * 8)) & 0xff; + break; + case 2: + bucket_info[j + (i * 4)].avx512_trl = + (resp >> (j * 8)) & 0xff; + break; + default: + break; + } + } + } + } + + return 0; +} + +int isst_get_fact_info(struct isst_id *id, int level, int fact_bucket, struct isst_fact_info *fact_info) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + unsigned int resp; + int j, ret, print; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + isst_display_error_info_message(1, "Failed to get number of levels", 0, 0); + return ret; + } + + if (level > pkg_dev.levels) { + isst_display_error_info_message(1, "Invalid level", 1, level); + return -1; + } + + ret = isst_get_ctdp_control(id, level, &ctdp_level); + if (ret) + return ret; + + if (!ctdp_level.fact_support) { + isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, level); + return -1; + } + + ret = isst_send_mbox_command(id->cpu, CONFIG_TDP, + CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO, 0, + level, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO resp:%x\n", + id->cpu, resp); + + fact_info->lp_clipping_ratio_license_sse = resp & 0xff; + fact_info->lp_clipping_ratio_license_avx2 = (resp >> 8) & 0xff; + fact_info->lp_clipping_ratio_license_avx512 = (resp >> 16) & 0xff; + + ret = isst_get_fact_bucket_info(id, level, fact_info->bucket_info); + if (ret) + return ret; + + print = 0; + for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { + if (fact_bucket != 0xff && fact_bucket != j) + continue; + + if (!fact_info->bucket_info[j].high_priority_cores_count) + break; + + print = 1; + } + if (!print) { + isst_display_error_info_message(1, "Invalid bucket", 0, 0); + return -1; + } + + return 0; +} + +int isst_get_trl(struct isst_id *id, unsigned long long *trl) +{ + int ret; + + ret = isst_send_msr_command(id->cpu, 0x1AD, 0, trl); + if (ret) + return ret; + + return 0; +} + +int isst_set_trl(struct isst_id *id, unsigned long long trl) +{ + int ret; + + if (!trl) + trl = 0xFFFFFFFFFFFFFFFFULL; + + ret = isst_send_msr_command(id->cpu, 0x1AD, 1, &trl); + if (ret) + return ret; + + return 0; +} + +int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl) +{ + unsigned long long msr_trl; + int ret; + + if (trl) { + msr_trl = trl; + } else { + struct isst_pkg_ctdp pkg_dev; + int trl[8]; + int i; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) + return ret; + + ret = isst_get_get_trl(id, pkg_dev.current_level, 0, trl); + if (ret) + return ret; + + msr_trl = 0; + for (i = 0; i < 8; ++i) { + unsigned long long _trl = trl[i]; + + msr_trl |= (_trl << (i * 8)); + } + } + ret = isst_send_msr_command(id->cpu, 0x1AD, 1, &msr_trl); + if (ret) + return ret; + + return 0; +} + +/* Return 1 if locked */ +int isst_get_config_tdp_lock_status(struct isst_id *id) +{ + unsigned long long tdp_control = 0; + int ret; + + ret = isst_send_msr_command(id->cpu, 0x64b, 0, &tdp_control); + if (ret) + return ret; + + ret = !!(tdp_control & BIT(31)); + + return ret; +} + +void isst_get_process_ctdp_complete(struct isst_id *id, struct isst_pkg_ctdp *pkg_dev) +{ + int i; + + if (!pkg_dev->processed) + return; + + for (i = 0; i < pkg_dev->levels; ++i) { + struct isst_pkg_ctdp_level_info *ctdp_level; + + ctdp_level = &pkg_dev->ctdp_level[i]; + if (ctdp_level->pbf_support) + free_cpu_set(ctdp_level->pbf_info.core_cpumask); + free_cpu_set(ctdp_level->core_cpumask); + } +} + +int isst_get_process_ctdp(struct isst_id *id, int tdp_level, struct isst_pkg_ctdp *pkg_dev) +{ + int i, ret, valid = 0; + + if (pkg_dev->processed) + return 0; + + ret = isst_get_ctdp_levels(id, pkg_dev); + if (ret) + return ret; + + debug_printf("cpu: %d ctdp enable:%d current level: %d levels:%d\n", + id->cpu, pkg_dev->enabled, pkg_dev->current_level, + pkg_dev->levels); + + if (tdp_level != 0xff && tdp_level > pkg_dev->levels) { + isst_display_error_info_message(1, "Invalid level", 0, 0); + return -1; + } + + if (!pkg_dev->enabled) + isst_display_error_info_message(0, "perf-profile feature is not supported, just base-config level 0 is valid", 0, 0); + + for (i = 0; i <= pkg_dev->levels; ++i) { + struct isst_pkg_ctdp_level_info *ctdp_level; + + if (tdp_level != 0xff && i != tdp_level) + continue; + + debug_printf("cpu:%d Get Information for TDP level:%d\n", id->cpu, + i); + ctdp_level = &pkg_dev->ctdp_level[i]; + + ctdp_level->level = i; + ctdp_level->control_cpu = id->cpu; + ctdp_level->pkg_id = id->pkg; + ctdp_level->die_id = id->die; + + ret = isst_get_ctdp_control(id, i, ctdp_level); + if (ret) + continue; + + valid = 1; + pkg_dev->processed = 1; + ctdp_level->processed = 1; + + if (ctdp_level->pbf_support) { + ret = isst_get_pbf_info(id, i, &ctdp_level->pbf_info); + if (!ret) + ctdp_level->pbf_found = 1; + } + + if (ctdp_level->fact_support) { + ret = isst_get_fact_info(id, i, 0xff, + &ctdp_level->fact_info); + if (ret) + return ret; + } + + if (!pkg_dev->enabled && is_skx_based_platform()) { + int freq; + + freq = get_cpufreq_base_freq(id->cpu); + if (freq > 0) { + ctdp_level->sse_p1 = freq / 100000; + ctdp_level->tdp_ratio = ctdp_level->sse_p1; + } + + isst_get_get_trl_from_msr(id, ctdp_level->trl_sse_active_cores); + isst_get_trl_bucket_info(id, &ctdp_level->buckets_info); + continue; + } + + ret = isst_get_tdp_info(id, i, ctdp_level); + if (ret) + return ret; + + ret = isst_get_pwr_info(id, i, ctdp_level); + if (ret) + return ret; + + ret = isst_get_tjmax_info(id, i, ctdp_level); + if (ret) + return ret; + + ctdp_level->core_cpumask_size = + alloc_cpu_set(&ctdp_level->core_cpumask); + ret = isst_get_coremask_info(id, i, ctdp_level); + if (ret) + return ret; + + ret = isst_get_trl_bucket_info(id, &ctdp_level->buckets_info); + if (ret) + return ret; + + ret = isst_get_get_trl(id, i, 0, + ctdp_level->trl_sse_active_cores); + if (ret) + return ret; + + ret = isst_get_get_trl(id, i, 1, + ctdp_level->trl_avx_active_cores); + if (ret) + return ret; + + ret = isst_get_get_trl(id, i, 2, + ctdp_level->trl_avx_512_active_cores); + if (ret) + return ret; + + isst_get_uncore_p0_p1_info(id, i, ctdp_level); + isst_get_p1_info(id, i, ctdp_level); + isst_get_uncore_mem_freq(id, i, ctdp_level); + } + + if (!valid) + isst_display_error_info_message(0, "Invalid level, Can't get TDP control information at specified levels on cpu", 1, id->cpu); + + return 0; +} + +int isst_clos_get_clos_information(struct isst_id *id, int *enable, int *type) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", id->cpu, resp); + + if (resp & BIT(1)) + *enable = 1; + else + *enable = 0; + + if (resp & BIT(2)) + *type = 1; + else + *type = 0; + + return 0; +} + +int isst_pm_qos_config(struct isst_id *id, int enable_clos, int priority_type) +{ + unsigned int req, resp; + int ret; + + if (!enable_clos) { + struct isst_pkg_ctdp pkg_dev; + struct isst_pkg_ctdp_level_info ctdp_level; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + debug_printf("isst_get_ctdp_levels\n"); + return ret; + } + + ret = isst_get_ctdp_control(id, pkg_dev.current_level, + &ctdp_level); + if (ret) + return ret; + + if (ctdp_level.fact_enabled) { + isst_display_error_info_message(1, "Ignoring request, turbo-freq feature is still enabled", 0, 0); + return -EINVAL; + } + ret = isst_write_pm_config(id, 0); + if (ret) + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0); + } else { + ret = isst_write_pm_config(id, 1); + if (ret) + isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0); + } + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0, + &resp); + if (ret) { + isst_display_error_info_message(1, "CLOS_PM_QOS_CONFIG command failed", 0, 0); + return ret; + } + + debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", id->cpu, resp); + + req = resp; + + if (enable_clos) + req = req | BIT(1); + else + req = req & ~BIT(1); + + if (priority_type > 1) + isst_display_error_info_message(1, "Invalid priority type: Changing type to ordered", 0, 0); + + if (priority_type) + req = req | BIT(2); + else + req = req & ~BIT(2); + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, + BIT(MBOX_CMD_WRITE_BIT), req, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CLOS_PM_QOS_CONFIG priority type:%d req:%x\n", id->cpu, + priority_type, req); + + return 0; +} + +int isst_pm_get_clos(struct isst_id *id, int clos, struct isst_clos_config *clos_config) +{ + unsigned int resp; + int ret; + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PM_CLOS, clos, 0, + &resp); + if (ret) + return ret; + + clos_config->epp = resp & 0x0f; + clos_config->clos_prop_prio = (resp >> 4) & 0x0f; + clos_config->clos_min = (resp >> 8) & 0xff; + clos_config->clos_max = (resp >> 16) & 0xff; + clos_config->clos_desired = (resp >> 24) & 0xff; + + return 0; +} + +int isst_set_clos(struct isst_id *id, int clos, struct isst_clos_config *clos_config) +{ + unsigned int req, resp; + unsigned int param; + int ret; + + req = clos_config->epp & 0x0f; + req |= (clos_config->clos_prop_prio & 0x0f) << 4; + req |= (clos_config->clos_min & 0xff) << 8; + req |= (clos_config->clos_max & 0xff) << 16; + req |= (clos_config->clos_desired & 0xff) << 24; + + param = BIT(MBOX_CMD_WRITE_BIT) | clos; + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PM_CLOS, param, req, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CLOS_PM_CLOS param:%x req:%x\n", id->cpu, param, req); + + return 0; +} + +int isst_clos_get_assoc_status(struct isst_id *id, int *clos_id) +{ + unsigned int resp; + unsigned int param; + int core_id, ret; + + core_id = find_phy_core_num(id->cpu); + param = core_id; + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PQR_ASSOC, param, 0, + &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CLOS_PQR_ASSOC param:%x resp:%x\n", id->cpu, param, + resp); + *clos_id = (resp >> 16) & 0x03; + + return 0; +} + +int isst_clos_associate(struct isst_id *id, int clos_id) +{ + unsigned int req, resp; + unsigned int param; + int core_id, ret; + + req = (clos_id & 0x03) << 16; + core_id = find_phy_core_num(id->cpu); + param = BIT(MBOX_CMD_WRITE_BIT) | core_id; + + ret = isst_send_mbox_command(id->cpu, CONFIG_CLOS, CLOS_PQR_ASSOC, param, + req, &resp); + if (ret) + return ret; + + debug_printf("cpu:%d CLOS_PQR_ASSOC param:%x req:%x\n", id->cpu, param, + req); + + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c new file mode 100644 index 000000000..0699137c0 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-daemon.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel Speed Select -- Allow speed select to daemonize + * Copyright (c) 2022 Intel Corporation. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/file.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <errno.h> +#include <getopt.h> +#include <signal.h> +#include <time.h> + +#include "isst.h" + +static int per_package_levels_info[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; +static time_t per_package_levels_tm[MAX_PACKAGE_COUNT][MAX_DIE_PER_PACKAGE]; + +static void init_levels(void) +{ + int i, j; + + for (i = 0; i < MAX_PACKAGE_COUNT; ++i) + for (j = 0; j < MAX_DIE_PER_PACKAGE; ++j) + per_package_levels_info[i][j] = -1; +} + +void process_level_change(struct isst_id *id) +{ + struct isst_pkg_ctdp_level_info ctdp_level; + struct isst_pkg_ctdp pkg_dev; + time_t tm; + int ret; + + if (id->pkg < 0 || id->die < 0) { + debug_printf("Invalid package/die info for cpu:%d\n", id->cpu); + return; + } + + tm = time(NULL); + if (tm - per_package_levels_tm[id->pkg][id->die] < 2) + return; + + per_package_levels_tm[id->pkg][id->die] = tm; + + ret = isst_get_ctdp_levels(id, &pkg_dev); + if (ret) { + debug_printf("Can't get tdp levels for cpu:%d\n", id->cpu); + return; + } + + debug_printf("Get Config level %d pkg:%d die:%d current_level:%d\n", id->cpu, + id->pkg, id->die, pkg_dev.current_level); + + if (pkg_dev.locked) { + debug_printf("config TDP s locked \n"); + return; + } + + if (per_package_levels_info[id->pkg][id->die] == pkg_dev.current_level) + return; + + debug_printf("**Config level change for cpu:%d pkg:%d die:%d from %d to %d\n", + id->cpu, id->pkg, id->die, per_package_levels_info[id->pkg][id->die], + pkg_dev.current_level); + + per_package_levels_info[id->pkg][id->die] = pkg_dev.current_level; + + ctdp_level.core_cpumask_size = + alloc_cpu_set(&ctdp_level.core_cpumask); + ret = isst_get_coremask_info(id, pkg_dev.current_level, &ctdp_level); + if (ret) { + free_cpu_set(ctdp_level.core_cpumask); + debug_printf("Can't get core_mask:%d\n", id->cpu); + return; + } + + if (ctdp_level.cpu_count) { + int i, max_cpus = get_topo_max_cpus(); + for (i = 0; i < max_cpus; ++i) { + if (!is_cpu_in_power_domain(i, id)) + continue; + if (CPU_ISSET_S(i, ctdp_level.core_cpumask_size, ctdp_level.core_cpumask)) { + fprintf(stderr, "online cpu %d\n", i); + set_cpu_online_offline(i, 1); + } else { + fprintf(stderr, "offline cpu %d\n", i); + set_cpu_online_offline(i, 0); + } + } + } + + free_cpu_set(ctdp_level.core_cpumask); +} + +static void _poll_for_config_change(struct isst_id *id, void *arg1, void *arg2, + void *arg3, void *arg4) +{ + process_level_change(id); +} + +static void poll_for_config_change(void) +{ + for_each_online_package_in_set(_poll_for_config_change, NULL, NULL, + NULL, NULL); +} + +static int done = 0; +static int pid_file_handle; + +static void signal_handler(int sig) +{ + switch (sig) { + case SIGINT: + case SIGTERM: + done = 1; + hfi_exit(); + exit(0); + break; + default: + break; + } +} + +static void daemonize(char *rundir, char *pidfile) +{ + int pid, sid, i; + char str[10]; + struct sigaction sig_actions; + sigset_t sig_set; + int ret; + + if (getppid() == 1) + return; + + sigemptyset(&sig_set); + sigaddset(&sig_set, SIGCHLD); + sigaddset(&sig_set, SIGTSTP); + sigaddset(&sig_set, SIGTTOU); + sigaddset(&sig_set, SIGTTIN); + sigprocmask(SIG_BLOCK, &sig_set, NULL); + + sig_actions.sa_handler = signal_handler; + sigemptyset(&sig_actions.sa_mask); + sig_actions.sa_flags = 0; + + sigaction(SIGHUP, &sig_actions, NULL); + sigaction(SIGTERM, &sig_actions, NULL); + sigaction(SIGINT, &sig_actions, NULL); + + pid = fork(); + if (pid < 0) { + /* Could not fork */ + exit(EXIT_FAILURE); + } + if (pid > 0) + exit(EXIT_SUCCESS); + + umask(027); + + sid = setsid(); + if (sid < 0) + exit(EXIT_FAILURE); + + /* close all descriptors */ + for (i = getdtablesize(); i >= 0; --i) + close(i); + + i = open("/dev/null", O_RDWR); + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = dup(i); + if (ret == -1) + exit(EXIT_FAILURE); + + ret = chdir(rundir); + if (ret == -1) + exit(EXIT_FAILURE); + + pid_file_handle = open(pidfile, O_RDWR | O_CREAT, 0600); + if (pid_file_handle == -1) { + /* Couldn't open lock file */ + exit(1); + } + /* Try to lock file */ +#ifdef LOCKF_SUPPORT + if (lockf(pid_file_handle, F_TLOCK, 0) == -1) { +#else + if (flock(pid_file_handle, LOCK_EX|LOCK_NB) < 0) { +#endif + /* Couldn't get lock on lock file */ + fprintf(stderr, "Couldn't get lock file %d\n", getpid()); + exit(1); + } + snprintf(str, sizeof(str), "%d\n", getpid()); + ret = write(pid_file_handle, str, strlen(str)); + if (ret == -1) + exit(EXIT_FAILURE); + + close(i); +} + +int isst_daemon(int debug_mode, int poll_interval, int no_daemon) +{ + int ret; + + if (!no_daemon && poll_interval < 0 && !debug_mode) { + fprintf(stderr, "OOB mode is enabled and will run as daemon\n"); + daemonize((char *) "/tmp/", + (char *)"/tmp/hfi-events.pid"); + } else { + signal(SIGINT, signal_handler); + } + + init_levels(); + + if (poll_interval < 0) { + ret = hfi_main(); + if (ret) { + fprintf(stderr, "HFI initialization failed\n"); + } + fprintf(stderr, "Must specify poll-interval\n"); + return ret; + } + + debug_printf("Starting loop\n"); + while (!done) { + sleep(poll_interval); + poll_for_config_change(); + } + + return 0; +} diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c new file mode 100644 index 000000000..b19f57d30 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst-display.c @@ -0,0 +1,785 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Intel dynamic_speed_select -- Enumerate and control features + * Copyright (c) 2019 Intel Corporation. + */ + +#include "isst.h" + +static void printcpulist(int str_len, char *str, int mask_size, + cpu_set_t *cpu_mask) +{ + int i, first, curr_index, index; + + if (!CPU_COUNT_S(mask_size, cpu_mask)) { + snprintf(str, str_len, "none"); + return; + } + + curr_index = 0; + first = 1; + for (i = 0; i < get_topo_max_cpus(); ++i) { + if (!CPU_ISSET_S(i, mask_size, cpu_mask)) + continue; + if (!first) { + index = snprintf(&str[curr_index], + str_len - curr_index, ","); + curr_index += index; + if (curr_index >= str_len) + break; + } + index = snprintf(&str[curr_index], str_len - curr_index, "%d", + i); + curr_index += index; + if (curr_index >= str_len) + break; + first = 0; + } +} + +static void printcpumask(int str_len, char *str, int mask_size, + cpu_set_t *cpu_mask) +{ + int i, max_cpus = get_topo_max_cpus(); + unsigned int *mask; + int size, index, curr_index; + + size = max_cpus / (sizeof(unsigned int) * 8); + if (max_cpus % (sizeof(unsigned int) * 8)) + size++; + + mask = calloc(size, sizeof(unsigned int)); + if (!mask) + return; + + for (i = 0; i < max_cpus; ++i) { + int mask_index, bit_index; + + if (!CPU_ISSET_S(i, mask_size, cpu_mask)) + continue; + + mask_index = i / (sizeof(unsigned int) * 8); + bit_index = i % (sizeof(unsigned int) * 8); + mask[mask_index] |= BIT(bit_index); + } + + curr_index = 0; + for (i = size - 1; i >= 0; --i) { + index = snprintf(&str[curr_index], str_len - curr_index, "%08x", + mask[i]); + curr_index += index; + if (curr_index >= str_len) + break; + if (i) { + strncat(&str[curr_index], ",", str_len - curr_index); + curr_index++; + } + if (curr_index >= str_len) + break; + } + + free(mask); +} + +static void format_and_print_txt(FILE *outf, int level, char *header, + char *value) +{ + char *spaces = " "; + static char delimiters[256]; + int i, j = 0; + + if (!level) + return; + + if (level == 1) { + strcpy(delimiters, " "); + } else { + for (i = 0; i < level - 1; ++i) + j += snprintf(&delimiters[j], sizeof(delimiters) - j, + "%s", spaces); + } + + if (header && value) { + fprintf(outf, "%s", delimiters); + fprintf(outf, "%s:%s\n", header, value); + } else if (header) { + fprintf(outf, "%s", delimiters); + fprintf(outf, "%s\n", header); + } +} + +static int last_level; +static void format_and_print(FILE *outf, int level, char *header, char *value) +{ + char *spaces = " "; + static char delimiters[256]; + int i; + + if (!out_format_is_json()) { + format_and_print_txt(outf, level, header, value); + return; + } + + if (level == 0) { + if (header) + fprintf(outf, "{"); + else + fprintf(outf, "\n}\n"); + + } else { + int j = 0; + + for (i = 0; i < level; ++i) + j += snprintf(&delimiters[j], sizeof(delimiters) - j, + "%s", spaces); + + if (last_level == level) + fprintf(outf, ",\n"); + + if (value) { + if (last_level != level) + fprintf(outf, "\n"); + + fprintf(outf, "%s\"%s\": ", delimiters, header); + fprintf(outf, "\"%s\"", value); + } else { + for (i = last_level - 1; i >= level; --i) { + int k = 0; + + for (j = i; j > 0; --j) + k += snprintf(&delimiters[k], + sizeof(delimiters) - k, + "%s", spaces); + if (i == level && header) + fprintf(outf, "\n%s},", delimiters); + else + fprintf(outf, "\n%s}", delimiters); + } + if (abs(last_level - level) < 3) + fprintf(outf, "\n"); + if (header) + fprintf(outf, "%s\"%s\": {", delimiters, + header); + } + } + + last_level = level; +} + +static int print_package_info(struct isst_id *id, FILE *outf) +{ + char header[256]; + + if (out_format_is_json()) { + snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d", + id->pkg, id->die, id->cpu); + format_and_print(outf, 1, header, NULL); + return 1; + } + snprintf(header, sizeof(header), "package-%d", id->pkg); + format_and_print(outf, 1, header, NULL); + snprintf(header, sizeof(header), "die-%d", id->die); + format_and_print(outf, 2, header, NULL); + snprintf(header, sizeof(header), "cpu-%d", id->cpu); + format_and_print(outf, 3, header, NULL); + + return 3; +} + +static void _isst_pbf_display_information(struct isst_id *id, FILE *outf, int level, + struct isst_pbf_info *pbf_info, + int disp_level) +{ + char header[256]; + char value[512]; + + snprintf(header, sizeof(header), "speed-select-base-freq-properties"); + format_and_print(outf, disp_level, header, NULL); + + snprintf(header, sizeof(header), "high-priority-base-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + pbf_info->p1_high * DISP_FREQ_MULTIPLIER); + format_and_print(outf, disp_level + 1, header, value); + + snprintf(header, sizeof(header), "high-priority-cpu-mask"); + printcpumask(sizeof(value), value, pbf_info->core_cpumask_size, + pbf_info->core_cpumask); + format_and_print(outf, disp_level + 1, header, value); + + snprintf(header, sizeof(header), "high-priority-cpu-list"); + printcpulist(sizeof(value), value, + pbf_info->core_cpumask_size, + pbf_info->core_cpumask); + format_and_print(outf, disp_level + 1, header, value); + + snprintf(header, sizeof(header), "low-priority-base-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + pbf_info->p1_low * DISP_FREQ_MULTIPLIER); + format_and_print(outf, disp_level + 1, header, value); + + if (is_clx_n_platform()) + return; + + snprintf(header, sizeof(header), "tjunction-temperature(C)"); + snprintf(value, sizeof(value), "%d", pbf_info->t_prochot); + format_and_print(outf, disp_level + 1, header, value); + + snprintf(header, sizeof(header), "thermal-design-power(W)"); + snprintf(value, sizeof(value), "%d", pbf_info->tdp); + format_and_print(outf, disp_level + 1, header, value); +} + +static void _isst_fact_display_information(struct isst_id *id, FILE *outf, int level, + int fact_bucket, int fact_avx, + struct isst_fact_info *fact_info, + int base_level) +{ + struct isst_fact_bucket_info *bucket_info = fact_info->bucket_info; + char header[256]; + char value[256]; + int print = 0, j; + + for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { + if (fact_bucket != 0xff && fact_bucket != j) + continue; + + if (!bucket_info[j].high_priority_cores_count) + break; + + print = 1; + } + if (!print) { + fprintf(stderr, "Invalid bucket\n"); + return; + } + + snprintf(header, sizeof(header), "speed-select-turbo-freq-properties"); + format_and_print(outf, base_level, header, NULL); + for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) { + if (fact_bucket != 0xff && fact_bucket != j) + continue; + + if (!bucket_info[j].high_priority_cores_count) + break; + + snprintf(header, sizeof(header), "bucket-%d", j); + format_and_print(outf, base_level + 1, header, NULL); + + snprintf(header, sizeof(header), "high-priority-cores-count"); + snprintf(value, sizeof(value), "%d", + bucket_info[j].high_priority_cores_count); + format_and_print(outf, base_level + 2, header, value); + + if (fact_avx & 0x01) { + snprintf(header, sizeof(header), + "high-priority-max-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + bucket_info[j].sse_trl * DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); + } + + if (fact_avx & 0x02) { + snprintf(header, sizeof(header), + "high-priority-max-avx2-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + bucket_info[j].avx_trl * DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); + } + + if (fact_avx & 0x04) { + snprintf(header, sizeof(header), + "high-priority-max-avx512-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + bucket_info[j].avx512_trl * + DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); + } + } + snprintf(header, sizeof(header), + "speed-select-turbo-freq-clip-frequencies"); + format_and_print(outf, base_level + 1, header, NULL); + snprintf(header, sizeof(header), "low-priority-max-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + fact_info->lp_clipping_ratio_license_sse * + DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); + snprintf(header, sizeof(header), + "low-priority-max-avx2-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + fact_info->lp_clipping_ratio_license_avx2 * + DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); + snprintf(header, sizeof(header), + "low-priority-max-avx512-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + fact_info->lp_clipping_ratio_license_avx512 * + DISP_FREQ_MULTIPLIER); + format_and_print(outf, base_level + 2, header, value); +} + +void isst_ctdp_display_core_info(struct isst_id *id, FILE *outf, char *prefix, + unsigned int val, char *str0, char *str1) +{ + char header[256]; + char value[256]; + int level = 1; + + if (out_format_is_json()) { + snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d", + id->pkg, id->die, id->cpu); + format_and_print(outf, level++, header, NULL); + } else { + snprintf(header, sizeof(header), "package-%d", id->pkg); + format_and_print(outf, level++, header, NULL); + snprintf(header, sizeof(header), "die-%d", id->die); + format_and_print(outf, level++, header, NULL); + snprintf(header, sizeof(header), "cpu-%d", id->cpu); + format_and_print(outf, level++, header, NULL); + } + + if (str0 && !val) + snprintf(value, sizeof(value), "%s", str0); + else if (str1 && val) + snprintf(value, sizeof(value), "%s", str1); + else + snprintf(value, sizeof(value), "%u", val); + format_and_print(outf, level, prefix, value); + + format_and_print(outf, 1, NULL, NULL); +} + +void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level, + struct isst_pkg_ctdp *pkg_dev) +{ + char header[256]; + char value[512]; + static int level; + int i; + + if (pkg_dev->processed) + level = print_package_info(id, outf); + + for (i = 0; i <= pkg_dev->levels; ++i) { + struct isst_pkg_ctdp_level_info *ctdp_level; + int j; + + ctdp_level = &pkg_dev->ctdp_level[i]; + if (!ctdp_level->processed) + continue; + + snprintf(header, sizeof(header), "perf-profile-level-%d", + ctdp_level->level); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "cpu-count"); + j = get_cpu_count(id); + snprintf(value, sizeof(value), "%d", j); + format_and_print(outf, level + 2, header, value); + + j = CPU_COUNT_S(ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask); + if (j) { + snprintf(header, sizeof(header), "enable-cpu-count"); + snprintf(value, sizeof(value), "%d", j); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->core_cpumask_size) { + snprintf(header, sizeof(header), "enable-cpu-mask"); + printcpumask(sizeof(value), value, + ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "enable-cpu-list"); + printcpulist(sizeof(value), value, + ctdp_level->core_cpumask_size, + ctdp_level->core_cpumask); + format_and_print(outf, level + 2, header, value); + } + + snprintf(header, sizeof(header), "thermal-design-power-ratio"); + snprintf(value, sizeof(value), "%d", ctdp_level->tdp_ratio); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "base-frequency(MHz)"); + if (!ctdp_level->sse_p1) + ctdp_level->sse_p1 = ctdp_level->tdp_ratio; + snprintf(value, sizeof(value), "%d", + ctdp_level->sse_p1 * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + + if (ctdp_level->avx2_p1) { + snprintf(header, sizeof(header), "base-frequency-avx2(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->avx2_p1 * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->avx512_p1) { + snprintf(header, sizeof(header), "base-frequency-avx512(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->avx512_p1 * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->uncore_p1) { + snprintf(header, sizeof(header), "uncore-frequency-min(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore_p1 * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->uncore_p0) { + snprintf(header, sizeof(header), "uncore-frequency-max(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->uncore_p0 * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->mem_freq) { + snprintf(header, sizeof(header), "mem-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->mem_freq); + format_and_print(outf, level + 2, header, value); + } + + snprintf(header, sizeof(header), + "speed-select-turbo-freq"); + if (ctdp_level->fact_support) { + if (ctdp_level->fact_enabled) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + } else + snprintf(value, sizeof(value), "unsupported"); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), + "speed-select-base-freq"); + if (ctdp_level->pbf_support) { + if (ctdp_level->pbf_enabled) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + } else + snprintf(value, sizeof(value), "unsupported"); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), + "speed-select-core-power"); + if (ctdp_level->sst_cp_support) { + if (ctdp_level->sst_cp_enabled) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + } else + snprintf(value, sizeof(value), "unsupported"); + format_and_print(outf, level + 2, header, value); + + if (is_clx_n_platform()) { + if (ctdp_level->pbf_support) + _isst_pbf_display_information(id, outf, + tdp_level, + &ctdp_level->pbf_info, + level + 2); + continue; + } + + if (ctdp_level->pkg_tdp) { + snprintf(header, sizeof(header), "thermal-design-power(W)"); + snprintf(value, sizeof(value), "%d", ctdp_level->pkg_tdp); + format_and_print(outf, level + 2, header, value); + } + + if (ctdp_level->t_proc_hot) { + snprintf(header, sizeof(header), "tjunction-max(C)"); + snprintf(value, sizeof(value), "%d", ctdp_level->t_proc_hot); + format_and_print(outf, level + 2, header, value); + } + + snprintf(header, sizeof(header), "turbo-ratio-limits-sse"); + format_and_print(outf, level + 2, header, NULL); + for (j = 0; j < 8; ++j) { + snprintf(header, sizeof(header), "bucket-%d", j); + format_and_print(outf, level + 3, header, NULL); + + snprintf(header, sizeof(header), "core-count"); + snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); + format_and_print(outf, level + 4, header, value); + + snprintf(header, sizeof(header), + "max-turbo-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", + ctdp_level->trl_sse_active_cores[j] * + DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 4, header, value); + } + + if (ctdp_level->trl_avx_active_cores[0]) { + snprintf(header, sizeof(header), "turbo-ratio-limits-avx2"); + format_and_print(outf, level + 2, header, NULL); + for (j = 0; j < 8; ++j) { + snprintf(header, sizeof(header), "bucket-%d", j); + format_and_print(outf, level + 3, header, NULL); + + snprintf(header, sizeof(header), "core-count"); + snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); + format_and_print(outf, level + 4, header, value); + + snprintf(header, sizeof(header), "max-turbo-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_active_cores[j] * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 4, header, value); + } + } + + if (ctdp_level->trl_avx_512_active_cores[0]) { + snprintf(header, sizeof(header), "turbo-ratio-limits-avx512"); + format_and_print(outf, level + 2, header, NULL); + for (j = 0; j < 8; ++j) { + snprintf(header, sizeof(header), "bucket-%d", j); + format_and_print(outf, level + 3, header, NULL); + + snprintf(header, sizeof(header), "core-count"); + snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff); + format_and_print(outf, level + 4, header, value); + + snprintf(header, sizeof(header), "max-turbo-frequency(MHz)"); + snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_512_active_cores[j] * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 4, header, value); + } + } + + if (ctdp_level->pbf_support) + _isst_pbf_display_information(id, outf, i, + &ctdp_level->pbf_info, + level + 2); + if (ctdp_level->fact_support) + _isst_fact_display_information(id, outf, i, 0xff, 0xff, + &ctdp_level->fact_info, + level + 2); + } + + format_and_print(outf, 1, NULL, NULL); +} + +static int start; +void isst_ctdp_display_information_start(FILE *outf) +{ + last_level = 0; + format_and_print(outf, 0, "start", NULL); + start = 1; +} + +void isst_ctdp_display_information_end(FILE *outf) +{ + format_and_print(outf, 0, NULL, NULL); + start = 0; +} + +void isst_pbf_display_information(struct isst_id *id, FILE *outf, int level, + struct isst_pbf_info *pbf_info) +{ + int _level; + + _level = print_package_info(id, outf); + _isst_pbf_display_information(id, outf, level, pbf_info, _level + 1); + format_and_print(outf, 1, NULL, NULL); +} + +void isst_fact_display_information(struct isst_id *id, FILE *outf, int level, + int fact_bucket, int fact_avx, + struct isst_fact_info *fact_info) +{ + int _level; + + _level = print_package_info(id, outf); + _isst_fact_display_information(id, outf, level, fact_bucket, fact_avx, + fact_info, _level + 1); + format_and_print(outf, 1, NULL, NULL); +} + +void isst_clos_display_information(struct isst_id *id, FILE *outf, int clos, + struct isst_clos_config *clos_config) +{ + char header[256]; + char value[256]; + int level; + + level = print_package_info(id, outf); + + snprintf(header, sizeof(header), "core-power"); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "clos"); + snprintf(value, sizeof(value), "%d", clos); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "epp"); + snprintf(value, sizeof(value), "%d", clos_config->epp); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "clos-proportional-priority"); + snprintf(value, sizeof(value), "%d", clos_config->clos_prop_prio); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "clos-min"); + snprintf(value, sizeof(value), "%d MHz", clos_config->clos_min * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "clos-max"); + if (clos_config->clos_max == 0xff) + snprintf(value, sizeof(value), "Max Turbo frequency"); + else + snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "clos-desired"); + snprintf(value, sizeof(value), "%d MHz", clos_config->clos_desired * DISP_FREQ_MULTIPLIER); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} + +void isst_clos_display_clos_information(struct isst_id *id, FILE *outf, + int clos_enable, int type, + int state, int cap) +{ + char header[256]; + char value[256]; + int level; + + level = print_package_info(id, outf); + + snprintf(header, sizeof(header), "core-power"); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "support-status"); + if (cap) + snprintf(value, sizeof(value), "supported"); + else + snprintf(value, sizeof(value), "unsupported"); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "enable-status"); + if (state) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "clos-enable-status"); + if (clos_enable) + snprintf(value, sizeof(value), "enabled"); + else + snprintf(value, sizeof(value), "disabled"); + format_and_print(outf, level + 2, header, value); + + snprintf(header, sizeof(header), "priority-type"); + if (type) + snprintf(value, sizeof(value), "ordered"); + else + snprintf(value, sizeof(value), "proportional"); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} + +void isst_clos_display_assoc_information(struct isst_id *id, FILE *outf, int clos) +{ + char header[256]; + char value[256]; + int level; + + level = print_package_info(id, outf); + + snprintf(header, sizeof(header), "get-assoc"); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "clos"); + snprintf(value, sizeof(value), "%d", clos); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} + +void isst_display_result(struct isst_id *id, FILE *outf, char *feature, char *cmd, + int result) +{ + char header[256]; + char value[256]; + int level = 3; + + if (id->cpu >= 0) + level = print_package_info(id, outf); + + snprintf(header, sizeof(header), "%s", feature); + format_and_print(outf, level + 1, header, NULL); + snprintf(header, sizeof(header), "%s", cmd); + if (!result) + snprintf(value, sizeof(value), "success"); + else + snprintf(value, sizeof(value), "failed(error %d)", result); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} + +void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg) +{ + FILE *outf = get_output_file(); + static int error_index; + char header[256]; + char value[256]; + + if (!out_format_is_json()) { + if (arg_valid) + snprintf(value, sizeof(value), "%s %d", msg, arg); + else + snprintf(value, sizeof(value), "%s", msg); + + if (error) + fprintf(outf, "Error: %s\n", value); + else + fprintf(outf, "Information: %s\n", value); + return; + } + + if (!start) + format_and_print(outf, 0, "start", NULL); + + if (error) + snprintf(header, sizeof(header), "Error%d", error_index++); + else + snprintf(header, sizeof(header), "Information:%d", error_index++); + format_and_print(outf, 1, header, NULL); + + snprintf(header, sizeof(header), "message"); + if (arg_valid) + snprintf(value, sizeof(value), "%s %d", msg, arg); + else + snprintf(value, sizeof(value), "%s", msg); + + format_and_print(outf, 2, header, value); + format_and_print(outf, 1, NULL, NULL); + if (!start) + format_and_print(outf, 0, NULL, NULL); +} + +void isst_trl_display_information(struct isst_id *id, FILE *outf, unsigned long long trl) +{ + char header[256]; + char value[256]; + int level; + + level = print_package_info(id, outf); + + snprintf(header, sizeof(header), "get-trl"); + format_and_print(outf, level + 1, header, NULL); + + snprintf(header, sizeof(header), "trl"); + snprintf(value, sizeof(value), "0x%llx", trl); + format_and_print(outf, level + 2, header, value); + + format_and_print(outf, level, NULL, NULL); +} diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h new file mode 100644 index 000000000..409fcc9c8 --- /dev/null +++ b/tools/power/x86/intel-speed-select/isst.h @@ -0,0 +1,276 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Intel Speed Select -- Enumerate and control features + * Copyright (c) 2019 Intel Corporation. + */ + +#ifndef _ISST_H_ +#define _ISST_H_ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <getopt.h> +#include <err.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <cpuid.h> +#include <dirent.h> +#include <errno.h> + +#include <stdarg.h> +#include <sys/ioctl.h> + +#define BIT(x) (1 << (x)) +#define BIT_ULL(nr) (1ULL << (nr)) +#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h)))) +#define GENMASK_ULL(h, l) \ + (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h)))) + +#define CONFIG_TDP 0x7f +#define CONFIG_TDP_GET_LEVELS_INFO 0x00 +#define CONFIG_TDP_GET_TDP_CONTROL 0x01 +#define CONFIG_TDP_SET_TDP_CONTROL 0x02 +#define CONFIG_TDP_GET_TDP_INFO 0x03 +#define CONFIG_TDP_GET_PWR_INFO 0x04 +#define CONFIG_TDP_GET_TJMAX_INFO 0x05 +#define CONFIG_TDP_GET_CORE_MASK 0x06 +#define CONFIG_TDP_GET_TURBO_LIMIT_RATIOS 0x07 +#define CONFIG_TDP_SET_LEVEL 0x08 +#define CONFIG_TDP_GET_UNCORE_P0_P1_INFO 0X09 +#define CONFIG_TDP_GET_P1_INFO 0x0a +#define CONFIG_TDP_GET_MEM_FREQ 0x0b + +#define CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_NUMCORES 0x10 +#define CONFIG_TDP_GET_FACT_HP_TURBO_LIMIT_RATIOS 0x11 +#define CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO 0x12 + +#define CONFIG_TDP_PBF_GET_CORE_MASK_INFO 0x20 +#define CONFIG_TDP_PBF_GET_P1HI_P1LO_INFO 0x21 +#define CONFIG_TDP_PBF_GET_TJ_MAX_INFO 0x22 +#define CONFIG_TDP_PBF_GET_TDP_INFO 0X23 + +#define CONFIG_CLOS 0xd0 +#define CLOS_PQR_ASSOC 0x00 +#define CLOS_PM_CLOS 0x01 +#define CLOS_PM_QOS_CONFIG 0x02 +#define CLOS_STATUS 0x03 + +#define MBOX_CMD_WRITE_BIT 0x08 + +#define PM_QOS_INFO_OFFSET 0x00 +#define PM_QOS_CONFIG_OFFSET 0x04 +#define PM_CLOS_OFFSET 0x08 +#define PQR_ASSOC_OFFSET 0x20 + +#define READ_PM_CONFIG 0x94 +#define WRITE_PM_CONFIG 0x95 +#define PM_FEATURE 0x03 + +#define DISP_FREQ_MULTIPLIER 100 + +#define MAX_PACKAGE_COUNT 8 +#define MAX_DIE_PER_PACKAGE 2 + +/* Unified structure to specific a CPU or a Power Domain */ +struct isst_id { + int cpu; + int pkg; + int die; +}; + +struct isst_clos_config { + unsigned char epp; + unsigned char clos_prop_prio; + unsigned char clos_min; + unsigned char clos_max; + unsigned char clos_desired; +}; + +struct isst_fact_bucket_info { + int high_priority_cores_count; + int sse_trl; + int avx_trl; + int avx512_trl; +}; + +struct isst_pbf_info { + int pbf_acticated; + int pbf_available; + size_t core_cpumask_size; + cpu_set_t *core_cpumask; + int p1_high; + int p1_low; + int t_control; + int t_prochot; + int tdp; +}; + +#define ISST_TRL_MAX_ACTIVE_CORES 8 +#define ISST_FACT_MAX_BUCKETS 8 +struct isst_fact_info { + int lp_clipping_ratio_license_sse; + int lp_clipping_ratio_license_avx2; + int lp_clipping_ratio_license_avx512; + struct isst_fact_bucket_info bucket_info[ISST_FACT_MAX_BUCKETS]; +}; + +struct isst_pkg_ctdp_level_info { + int processed; + int control_cpu; + int pkg_id; + int die_id; + int level; + int fact_support; + int pbf_support; + int fact_enabled; + int pbf_enabled; + int sst_cp_support; + int sst_cp_enabled; + int tdp_ratio; + int active; + int tdp_control; + int pkg_tdp; + int pkg_min_power; + int pkg_max_power; + int fact; + int t_proc_hot; + int uncore_p0; + int uncore_p1; + int sse_p1; + int avx2_p1; + int avx512_p1; + int mem_freq; + size_t core_cpumask_size; + cpu_set_t *core_cpumask; + int cpu_count; + unsigned long long buckets_info; + int trl_sse_active_cores[ISST_TRL_MAX_ACTIVE_CORES]; + int trl_avx_active_cores[ISST_TRL_MAX_ACTIVE_CORES]; + int trl_avx_512_active_cores[ISST_TRL_MAX_ACTIVE_CORES]; + int kobj_bucket_index; + int active_bucket; + int fact_max_index; + int fact_max_config; + int pbf_found; + int pbf_active; + struct isst_pbf_info pbf_info; + struct isst_fact_info fact_info; +}; + +#define ISST_MAX_TDP_LEVELS (4 + 1) /* +1 for base config */ +struct isst_pkg_ctdp { + int locked; + int version; + int processed; + int levels; + int current_level; + int enabled; + struct isst_pkg_ctdp_level_info ctdp_level[ISST_MAX_TDP_LEVELS]; +}; + +extern int is_cpu_in_power_domain(int cpu, struct isst_id *id); +extern int get_topo_max_cpus(void); +extern int get_cpu_count(struct isst_id *id); +extern int get_max_punit_core_id(struct isst_id *id); + +/* Common interfaces */ +FILE *get_output_file(void); +extern void debug_printf(const char *format, ...); +extern int out_format_is_json(void); +extern void set_isst_id(struct isst_id *id, int cpu); +extern size_t alloc_cpu_set(cpu_set_t **cpu_set); +extern void free_cpu_set(cpu_set_t *cpu_set); +extern int find_phy_core_num(int logical_cpu); +extern void set_cpu_mask_from_punit_coremask(struct isst_id *id, + unsigned long long core_mask, + size_t core_cpumask_size, + cpu_set_t *core_cpumask, + int *cpu_cnt); + +extern int isst_send_mbox_command(unsigned int cpu, unsigned char command, + unsigned char sub_command, + unsigned int write, + unsigned int req_data, unsigned int *resp); + +extern int isst_send_msr_command(unsigned int cpu, unsigned int command, + int write, unsigned long long *req_resp); + +extern int isst_get_ctdp_levels(struct isst_id *id, struct isst_pkg_ctdp *pkg_dev); +extern int isst_get_ctdp_control(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level); +extern int isst_get_coremask_info(struct isst_id *id, int config_index, + struct isst_pkg_ctdp_level_info *ctdp_level); +extern int isst_get_process_ctdp(struct isst_id *id, int tdp_level, + struct isst_pkg_ctdp *pkg_dev); +extern void isst_get_process_ctdp_complete(struct isst_id *id, + struct isst_pkg_ctdp *pkg_dev); +extern void isst_ctdp_display_information(struct isst_id *id, FILE *outf, int tdp_level, + struct isst_pkg_ctdp *pkg_dev); +extern void isst_ctdp_display_core_info(struct isst_id *id, FILE *outf, char *prefix, + unsigned int val, char *str0, char *str1); +extern void isst_ctdp_display_information_start(FILE *outf); +extern void isst_ctdp_display_information_end(FILE *outf); +extern void isst_pbf_display_information(struct isst_id *id, FILE *outf, int level, + struct isst_pbf_info *info); +extern int isst_set_tdp_level(struct isst_id *id, int tdp_level); +extern int isst_set_pbf_fact_status(struct isst_id *id, int pbf, int enable); +extern int isst_get_pbf_info(struct isst_id *id, int level, + struct isst_pbf_info *pbf_info); +extern void isst_get_pbf_info_complete(struct isst_pbf_info *pbf_info); +extern int isst_get_fact_info(struct isst_id *id, int level, int fact_bucket, + struct isst_fact_info *fact_info); +extern int isst_get_fact_bucket_info(struct isst_id *id, int level, + struct isst_fact_bucket_info *bucket_info); +extern void isst_fact_display_information(struct isst_id *id, FILE *outf, int level, + int fact_bucket, int fact_avx, + struct isst_fact_info *fact_info); +extern int isst_set_trl(struct isst_id *id, unsigned long long trl); +extern int isst_get_trl(struct isst_id *id, unsigned long long *trl); +extern int isst_set_trl_from_current_tdp(struct isst_id *id, unsigned long long trl); +extern int isst_get_config_tdp_lock_status(struct isst_id *id); + +extern int isst_pm_qos_config(struct isst_id *id, int enable_clos, int priority_type); +extern int isst_pm_get_clos(struct isst_id *id, int clos, + struct isst_clos_config *clos_config); +extern int isst_set_clos(struct isst_id *id, int clos, + struct isst_clos_config *clos_config); +extern int isst_clos_associate(struct isst_id *id, int clos); +extern int isst_clos_get_assoc_status(struct isst_id *id, int *clos_id); +extern void isst_clos_display_information(struct isst_id *id, FILE *outf, int clos, + struct isst_clos_config *clos_config); +extern void isst_clos_display_assoc_information(struct isst_id *id, FILE *outf, int clos); + +extern void isst_display_result(struct isst_id *id, FILE *outf, char *feature, char *cmd, + int result); + +extern int isst_clos_get_clos_information(struct isst_id *id, int *enable, int *type); +extern void isst_clos_display_clos_information(struct isst_id *id, FILE *outf, + int clos_enable, int type, + int state, int cap); +extern int is_clx_n_platform(void); +extern int get_cpufreq_base_freq(int cpu); +extern int isst_read_pm_config(struct isst_id *id, int *cp_state, int *cp_cap); +extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg); +extern int is_skx_based_platform(void); +extern int is_spr_platform(void); +extern int is_icx_platform(void); +extern void isst_trl_display_information(struct isst_id *id, FILE *outf, unsigned long long trl); + +extern void set_cpu_online_offline(int cpu, int state); +extern void for_each_online_package_in_set(void (*callback)(struct isst_id *, void *, void *, + void *, void *), + void *arg1, void *arg2, void *arg3, + void *arg4); +extern int isst_daemon(int debug_mode, int poll_interval, int no_daemon); +extern void process_level_change(struct isst_id *id); +extern int hfi_main(void); +extern void hfi_exit(void); +#endif diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py new file mode 100755 index 000000000..b46e9eb8f --- /dev/null +++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py @@ -0,0 +1,613 @@ +#!/usr/bin/env python +# SPDX-License-Identifier: GPL-2.0-only +# -*- coding: utf-8 -*- +# +""" This utility can be used to debug and tune the performance of the +intel_pstate driver. This utility can be used in two ways: +- If there is Linux trace file with pstate_sample events enabled, then +this utility can parse the trace file and generate performance plots. +- If user has not specified a trace file as input via command line parameters, +then this utility enables and collects trace data for a user specified interval +and generates performance plots. + +Prerequisites: + Python version 2.7.x or higher + gnuplot 5.0 or higher + gnuplot-py 1.8 or higher + (Most of the distributions have these required packages. They may be called + gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... ) + + HWP (Hardware P-States are disabled) + Kernel config for Linux trace is enabled + + see print_help(): for Usage and Output details + +""" +from __future__ import print_function +from datetime import datetime +import subprocess +import os +import time +import re +import signal +import sys +import getopt +import Gnuplot +from numpy import * +from decimal import * + +__author__ = "Srinivas Pandruvada" +__copyright__ = " Copyright (c) 2017, Intel Corporation. " +__license__ = "GPL version 2" + + +MAX_CPUS = 256 + +# Define the csv file columns +C_COMM = 18 +C_GHZ = 17 +C_ELAPSED = 16 +C_SAMPLE = 15 +C_DURATION = 14 +C_LOAD = 13 +C_BOOST = 12 +C_FREQ = 11 +C_TSC = 10 +C_APERF = 9 +C_MPERF = 8 +C_TO = 7 +C_FROM = 6 +C_SCALED = 5 +C_CORE = 4 +C_USEC = 3 +C_SEC = 2 +C_CPU = 1 + +global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname, trace_file + +# 11 digits covers uptime to 115 days +getcontext().prec = 11 + +sample_num =0 +last_sec_cpu = [0] * MAX_CPUS +last_usec_cpu = [0] * MAX_CPUS + +def print_help(driver_name): + print('%s_tracer.py:'%driver_name) + print(' Usage:') + print(' If the trace file is available, then to simply parse and plot, use (sudo not required):') + print(' ./%s_tracer.py [-c cpus] -t <trace_file> -n <test_name>'%driver_name) + print(' Or') + print(' ./%s_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>'%driver_name) + print(' To generate trace file, parse and plot, use (sudo required):') + print(' sudo ./%s_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>'%driver_name) + print(' Or') + print(' sudo ./%s_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>'%driver_name) + print(' Optional argument:') + print(' cpus: comma separated list of CPUs') + print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240') + print(' Output:') + print(' If not already present, creates a "results/test_name" folder in the current working directory with:') + print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.') + print(' cpu???.csv - comma seperated values file for CPU number ???.') + print(' *.png - a variety of PNG format plot files created from the trace contents and the additional calculations.') + print(' Notes:') + print(' Avoid the use of _ (underscore) in test names, because in gnuplot it is a subscript directive.') + print(' Maximum number of CPUs is {0:d}. If there are more the script will abort with an error.'.format(MAX_CPUS)) + print(' Off-line CPUs cause the script to list some warnings, and create some empty files. Use the CPU mask feature for a clean run.') + print(' Empty y range warnings for autoscaled plots can occur and can be ignored.') + +def plot_perf_busy_with_sample(cpu_index): + """ Plot method to per cpu information """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_perf_busy_vs_samples.png" % cpu_index + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y1 range + g_plot('set y2range [0:200]') + g_plot('set y2tics 0, 10') + g_plot('set title "{} : cpu perf busy vs. sample : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now())) +# Override common + g_plot('set xlabel "Samples"') + g_plot('set ylabel "P-State"') + g_plot('set y2label "Scaled Busy/performance/io-busy(%)"') + set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_SAMPLE, C_CORE)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_SAMPLE, C_SCALED)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_SAMPLE, C_BOOST)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_SAMPLE, C_TO)) + +def plot_perf_busy(cpu_index): + """ Plot some per cpu information """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_perf_busy.png" % cpu_index + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y1 range + g_plot('set y2range [0:200]') + g_plot('set y2tics 0, 10') + g_plot('set title "{} : perf busy : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now())) + g_plot('set ylabel "P-State"') + g_plot('set y2label "Scaled Busy/performance/io-busy(%)"') + set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_ELAPSED, C_CORE)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_ELAPSED, C_SCALED)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_ELAPSED, C_BOOST)) + g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_ELAPSED, C_TO)) + +def plot_durations(cpu_index): + """ Plot per cpu durations """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_durations.png" % cpu_index + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set title "{} : durations : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now())) + g_plot('set ylabel "Timer Duration (MilliSeconds)"') +# override common + g_plot('set key off') + set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DURATION)) + +def plot_loads(cpu_index): + """ Plot per cpu loads """ + + file_name = 'cpu{:0>3}.csv'.format(cpu_index) + if os.path.exists(file_name): + output_png = "cpu%03d_loads.png" % cpu_index + g_plot = common_all_gnuplot_settings(output_png) + g_plot('set yrange [0:100]') + g_plot('set ytics 0, 10') + g_plot('set title "{} : loads : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now())) + g_plot('set ylabel "CPU load (percent)"') +# override common + g_plot('set key off') + set_4_plot_linestyles(g_plot) + g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD)) + +def plot_pstate_cpu_with_sample(): + """ Plot all cpu information """ + + if os.path.exists('cpu.csv'): + output_png = 'all_cpu_pstates_vs_samples.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range +# override common + g_plot('set xlabel "Samples"') + g_plot('set ylabel "P-State"') + g_plot('set title "{} : cpu pstate vs. sample : {:%F %H:%M}"'.format(testname, datetime.now())) + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_SAMPLE, C_TO) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_pstate_cpu(): + """ Plot all cpu information from csv files """ + + output_png = 'all_cpu_pstates.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set ylabel "P-State"') + g_plot('set title "{} : cpu pstates : {:%F %H:%M}"'.format(testname, datetime.now())) + +# the following command is really cool, but doesn't work with the CPU masking option because it aborts on the first missing file. +# plot_str = 'plot for [i=0:*] file=sprintf("cpu%03d.csv",i) title_s=sprintf("cpu%03d",i) file using 16:7 pt 7 ps 1 title title_s' +# + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_TO) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_load_cpu(): + """ Plot all cpu loads """ + + output_png = 'all_cpu_loads.png' + g_plot = common_all_gnuplot_settings(output_png) + g_plot('set yrange [0:100]') + g_plot('set ylabel "CPU load (percent)"') + g_plot('set title "{} : cpu loads : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_LOAD) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_frequency_cpu(): + """ Plot all cpu frequencies """ + + output_png = 'all_cpu_frequencies.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set ylabel "CPU Frequency (GHz)"') + g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_duration_cpu(): + """ Plot all cpu durations """ + + output_png = 'all_cpu_durations.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set ylabel "Timer Duration (MilliSeconds)"') + g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_DURATION) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_scaled_cpu(): + """ Plot all cpu scaled busy """ + + output_png = 'all_cpu_scaled.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set ylabel "Scaled Busy (Unitless)"') + g_plot('set title "{} : cpu scaled busy : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_SCALED) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_boost_cpu(): + """ Plot all cpu IO Boosts """ + + output_png = 'all_cpu_boost.png' + g_plot = common_all_gnuplot_settings(output_png) + g_plot('set yrange [0:100]') + g_plot('set ylabel "CPU IO Boost (percent)"') + g_plot('set title "{} : cpu io boost : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_BOOST) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def plot_ghz_cpu(): + """ Plot all cpu tsc ghz """ + + output_png = 'all_cpu_ghz.png' + g_plot = common_all_gnuplot_settings(output_png) +# autoscale this one, no set y range + g_plot('set ylabel "TSC Frequency (GHz)"') + g_plot('set title "{} : cpu TSC Frequencies (Sanity check calculation) : {:%F %H:%M}"'.format(testname, datetime.now())) + + title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ') + plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_GHZ) + g_plot('title_list = "{}"'.format(title_list)) + g_plot(plot_str) + +def common_all_gnuplot_settings(output_png): + """ common gnuplot settings for multiple CPUs one one graph. """ + + g_plot = common_gnuplot_settings() + g_plot('set output "' + output_png + '"') + return(g_plot) + +def common_gnuplot_settings(): + """ common gnuplot settings. """ + + g_plot = Gnuplot.Gnuplot(persist=1) +# The following line is for rigor only. It seems to be assumed for .csv files + g_plot('set datafile separator \",\"') + g_plot('set ytics nomirror') + g_plot('set xtics nomirror') + g_plot('set xtics font ", 10"') + g_plot('set ytics font ", 10"') + g_plot('set tics out scale 1.0') + g_plot('set grid') + g_plot('set key out horiz') + g_plot('set key bot center') + g_plot('set key samplen 2 spacing .8 font ", 9"') + g_plot('set term png size 1200, 600') + g_plot('set title font ", 11"') + g_plot('set ylabel font ", 10"') + g_plot('set xlabel font ", 10"') + g_plot('set xlabel offset 0, 0.5') + g_plot('set xlabel "Elapsed Time (Seconds)"') + return(g_plot) + +def set_4_plot_linestyles(g_plot): + """ set the linestyles used for 4 plots in 1 graphs. """ + + g_plot('set style line 1 linetype 1 linecolor rgb "green" pointtype -1') + g_plot('set style line 2 linetype 1 linecolor rgb "red" pointtype -1') + g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1') + g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1') + +def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask): + """ Store master csv file information """ + + global graph_data_present + + if cpu_mask[cpu_int] == 0: + return + + try: + f_handle = open('cpu.csv', 'a') + string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %u, %u, %u, %u, %.4f, %u, %.2f, %.3f, %u, %.3f, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(core_busy), int(scaled), int(_from), int(_to), int(mperf), int(aperf), int(tsc), freq_ghz, int(io_boost), load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm) + f_handle.write(string_buffer); + f_handle.close() + except: + print('IO error cpu.csv') + return + + graph_data_present = True; + +def split_csv(current_max_cpu, cpu_mask): + """ seperate the all csv file into per CPU csv files. """ + + if os.path.exists('cpu.csv'): + for index in range(0, current_max_cpu + 1): + if cpu_mask[int(index)] != 0: + os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index)) + os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index)) + +def fix_ownership(path): + """Change the owner of the file to SUDO_UID, if required""" + + uid = os.environ.get('SUDO_UID') + gid = os.environ.get('SUDO_GID') + if uid is not None: + os.chown(path, int(uid), int(gid)) + +def cleanup_data_files(): + """ clean up existing data files """ + + if os.path.exists('cpu.csv'): + os.remove('cpu.csv') + f_handle = open('cpu.csv', 'a') + f_handle.write('common_cpu, common_secs, common_usecs, core_busy, scaled_busy, from, to, mperf, aperf, tsc, freq, boost, load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm') + f_handle.write('\n') + f_handle.close() + +def clear_trace_file(): + """ Clear trace file """ + + try: + f_handle = open('/sys/kernel/debug/tracing/trace', 'w') + f_handle.close() + except: + print('IO error clearing trace file ') + sys.exit(2) + +def enable_trace(trace_file): + """ Enable trace """ + + try: + open(trace_file,'w').write("1") + except: + print('IO error enabling trace ') + sys.exit(2) + +def disable_trace(trace_file): + """ Disable trace """ + + try: + open(trace_file, 'w').write("0") + except: + print('IO error disabling trace ') + sys.exit(2) + +def set_trace_buffer_size(memory): + """ Set trace buffer size """ + + try: + with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp: + fp.write(memory) + except: + print('IO error setting trace buffer size ') + sys.exit(2) + +def free_trace_buffer(): + """ Free the trace buffer memory """ + + try: + open('/sys/kernel/debug/tracing/buffer_size_kb' + , 'w').write("1") + except: + print('IO error freeing trace buffer ') + sys.exit(2) + +def read_trace_data(filename, cpu_mask): + """ Read and parse trace data """ + + global current_max_cpu + global sample_num, last_sec_cpu, last_usec_cpu, start_time + + try: + data = open(filename, 'r').read() + except: + print('Error opening ', filename) + sys.exit(2) + + for line in data.splitlines(): + search_obj = \ + re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?core_busy=)(\d+)(.*?scaled=)(\d+)(.*?from=)(\d+)(.*?to=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)(.*?freq=)(\d+)' + , line) + + if search_obj: + cpu = search_obj.group(3) + cpu_int = int(cpu) + cpu = str(cpu_int) + + time_pre_dec = search_obj.group(6) + time_post_dec = search_obj.group(8) + core_busy = search_obj.group(10) + scaled = search_obj.group(12) + _from = search_obj.group(14) + _to = search_obj.group(16) + mperf = search_obj.group(18) + aperf = search_obj.group(20) + tsc = search_obj.group(22) + freq = search_obj.group(24) + common_comm = search_obj.group(2).replace(' ', '') + + # Not all kernel versions have io_boost field + io_boost = '0' + search_obj = re.search(r'.*?io_boost=(\d+)', line) + if search_obj: + io_boost = search_obj.group(1) + + if sample_num == 0 : + start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) + sample_num += 1 + + if last_sec_cpu[cpu_int] == 0 : + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + else : + duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int])) + duration_ms = Decimal(duration_us) / Decimal(1000) + last_sec_cpu[cpu_int] = time_pre_dec + last_usec_cpu[cpu_int] = time_post_dec + elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time + load = Decimal(int(mperf)*100)/ Decimal(tsc) + freq_ghz = Decimal(freq)/Decimal(1000000) +# Sanity check calculation, typically anomalies indicate missed samples +# However, check for 0 (should never occur) + tsc_ghz = Decimal(0) + if duration_ms != Decimal(0) : + tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000) + store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz, cpu_mask) + + if cpu_int > current_max_cpu: + current_max_cpu = cpu_int +# End of for each trace line loop +# Now seperate the main overall csv file into per CPU csv files. + split_csv(current_max_cpu, cpu_mask) + +def signal_handler(signal, frame): + print(' SIGINT: Forcing cleanup before exit.') + if interval: + disable_trace(trace_file) + clear_trace_file() + # Free the memory + free_trace_buffer() + sys.exit(0) + +if __name__ == "__main__": + trace_file = "/sys/kernel/debug/tracing/events/power/pstate_sample/enable" + signal.signal(signal.SIGINT, signal_handler) + + interval = "" + filename = "" + cpu_list = "" + testname = "" + memory = "10240" + graph_data_present = False; + + valid1 = False + valid2 = False + + cpu_mask = zeros((MAX_CPUS,), dtype=int) + + try: + opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="]) + except getopt.GetoptError: + print_help('intel_pstate') + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print_help('intel_pstate') + sys.exit() + elif opt in ("-t", "--trace_file"): + valid1 = True + location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + filename = os.path.join(location, arg) + elif opt in ("-i", "--interval"): + valid1 = True + interval = arg + elif opt in ("-c", "--cpu"): + cpu_list = arg + elif opt in ("-n", "--name"): + valid2 = True + testname = arg + elif opt in ("-m", "--memory"): + memory = arg + + if not (valid1 and valid2): + print_help('intel_pstate') + sys.exit() + + if cpu_list: + for p in re.split("[,]", cpu_list): + if int(p) < MAX_CPUS : + cpu_mask[int(p)] = 1 + else: + for i in range (0, MAX_CPUS): + cpu_mask[i] = 1 + + if not os.path.exists('results'): + os.mkdir('results') + # The regular user needs to own the directory, not root. + fix_ownership('results') + + os.chdir('results') + if os.path.exists(testname): + print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.') + sys.exit() + os.mkdir(testname) + # The regular user needs to own the directory, not root. + fix_ownership(testname) + os.chdir(testname) + + # Temporary (or perhaps not) + cur_version = sys.version_info + print('python version (should be >= 2.7):') + print(cur_version) + + # Left as "cleanup" for potential future re-run ability. + cleanup_data_files() + + if interval: + filename = "/sys/kernel/debug/tracing/trace" + clear_trace_file() + set_trace_buffer_size(memory) + enable_trace(trace_file) + print('Sleeping for ', interval, 'seconds') + time.sleep(int(interval)) + disable_trace(trace_file) + + current_max_cpu = 0 + + read_trace_data(filename, cpu_mask) + + if interval: + clear_trace_file() + # Free the memory + free_trace_buffer() + + if graph_data_present == False: + print('No valid data to plot') + sys.exit(2) + + for cpu_no in range(0, current_max_cpu + 1): + plot_perf_busy_with_sample(cpu_no) + plot_perf_busy(cpu_no) + plot_durations(cpu_no) + plot_loads(cpu_no) + + plot_pstate_cpu_with_sample() + plot_pstate_cpu() + plot_load_cpu() + plot_frequency_cpu() + plot_duration_cpu() + plot_scaled_cpu() + plot_boost_cpu() + plot_ghz_cpu() + + # It is preferrable, but not necessary, that the regular user owns the files, not root. + for root, dirs, files in os.walk('.'): + for f in files: + fix_ownership(f) + + os.chdir('../../') diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore new file mode 100644 index 000000000..e13109b43 --- /dev/null +++ b/tools/power/x86/turbostat/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +turbostat diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile new file mode 100644 index 000000000..92e139b9c --- /dev/null +++ b/tools/power/x86/turbostat/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(CURDIR) +PREFIX ?= /usr +DESTDIR ?= + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + +turbostat : turbostat.c +override CFLAGS += -O2 -Wall -Wextra -I../../../include +override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' +override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FILE_OFFSET_BITS=64 +override CFLAGS += -D_FORTIFY_SOURCE=2 + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt + +.PHONY : clean +clean : + @rm -f $(BUILD_OUTPUT)/turbostat + +install : turbostat + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 new file mode 100644 index 000000000..3e1a4c4be --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.8 @@ -0,0 +1,403 @@ +.TH TURBOSTAT 8 +.SH NAME +turbostat \- Report processor frequency and idle statistics +.SH SYNOPSIS +.ft B +.B turbostat +.RB [ Options ] +.RB command +.br +.B turbostat +.RB [ Options ] +.RB [ "\--interval seconds" ] +.SH DESCRIPTION +\fBturbostat \fP reports processor topology, frequency, +idle power-state statistics, temperature and power on X86 processors. +There are two ways to invoke turbostat. +The first method is to supply a +\fBcommand\fP, which is forked and statistics are printed +in one-shot upon its completion. +The second method is to omit the command, +and turbostat displays statistics every 5 seconds interval. +The 5-second interval can be changed using the --interval option. +.PP +Some information is not available on older processors. +.SS Options +Options can be specified with a single or double '-', and only as much of the option +name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. +.PP +\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. +.nf + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} + msrDDD is a decimal offset, eg. msr16 + msr0xXXX is a hex offset, eg. msr0x10 + /sys/path... is an absolute path to a sysfs attribute + + scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} + sample and print the counter for every cpu, core, or package. + default: cpu + + size: {\fBu32\fP | \fBu64\fP } + MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits. + default: u64 + + format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP} + 'raw' shows the MSR contents in hex. + 'delta' shows the difference in values during the measurement interval. + 'percent' shows the delta as a percentage of the cycles elapsed. + default: delta + + name: "name_string" + Any string that does not match a key-word above is used + as the column header. +.fi +.PP +\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 +.PP +\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. +.PP +\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC". +The column name "all" can be used to enable all disabled-by-default built-in counters. +.PP +\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. +.PP +\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other". +.PP +\fB--Dump\fP displays the raw counter values. +.PP +\fB--quiet\fP Do not decode and print the system configuration header information. +.PP +\fB--interval seconds\fP overrides the default 5.0 second measurement interval. +.PP +\fB--num_iterations num\fP number of the measurement iterations. +.PP +\fB--out output_file\fP turbostat output is written to the specified output_file. +The file is truncated if it already exists, and it is created if it does not exist. +.PP +\fB--help\fP displays usage for the most common parameters. +.PP +\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts. +.PP +\fB--list\fP display column header names available for use by --show and --hide, then exit. +.PP +\fB--Summary\fP limits output to a 1-line System Summary for each interval. +.PP +\fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value. This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature. +.PP +\fB--version\fP displays the version. +.PP +The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, +displays the statistics gathered since it was forked. +.PP +.SH ROW DESCRIPTIONS +The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system. +.SH COLUMN DESCRIPTIONS +.PP +\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus. +.PP +\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU. +.PP +\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). +.PP +\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. +.PP +\fBPackage\fP processor package number -- not present on systems with a single processor package. +.PP +\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed. +.PP +\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state. +.PP +\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state). +.PP +\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. +.PP +\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary. +.PP +\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs. +.PP +\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system. +.PP +\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved. +.PP +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters. +.PP +\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. +.PP +\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. +.PP +\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms. +.PP +\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz. +.PP +\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters. +.PP +\fBPkgWatt\fP Watts consumed by the whole package. +.PP +\fBCorWatt\fP Watts consumed by the core part of the package. +.PP +\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. +.PP +\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. +.PP +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary. +.PP +\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. +.PP +\fBUncMHz\fP uncore MHz, instantaneous sample. +.SH TOO MUCH INFORMATION EXAMPLE +By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters. +This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug. +.PP +When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list. +.PP +To find out what columns are available for --show and --hide, the "--list" option is available. Usually, the CATEGORY names above are used to refer to groups of counters. Also, for convenience, the special string "sysfs" can be used to refer to all of the sysfs C-state counters at once: +.PP +.nf +sudo ./turbostat --show sysfs --quiet sleep 10 +10.003837 sec + C1 C1E C3 C6 C7s C1% C1E% C3% C6% C7s% + 4 21 2 2 459 0.14 0.82 0.00 0.00 98.93 + 1 17 2 2 130 0.00 0.02 0.00 0.00 99.80 + 0 0 0 0 31 0.00 0.00 0.00 0.00 99.95 + 2 1 0 0 52 1.14 6.49 0.00 0.00 92.21 + 1 2 0 0 52 0.00 0.08 0.00 0.00 99.86 + 0 0 0 0 71 0.00 0.00 0.00 0.00 99.89 + 0 0 0 0 25 0.00 0.00 0.00 0.00 99.96 + 0 0 0 0 74 0.00 0.00 0.00 0.00 99.94 + 0 1 0 0 24 0.00 0.00 0.00 0.00 99.84 +.fi +.PP +.SH ONE SHOT COMMAND EXAMPLE +If turbostat is invoked with a command, it will fork that command +and output the statistics gathered after the command exits. +In this case, turbostat output goes to stderr, by default. +Output can instead be saved to a file using the --out option. +In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out". Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork. The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry. + +.nf +[root@hsw]# ./turbostat -o ts.out sleep 10 +[root@hsw]# +.fi + +.SH PERIODIC INTERVAL EXAMPLE +Without a command to fork, turbostat displays statistics ever 5 seconds. +Periodic output goes to stdout, by default, unless --out is used to specify an output file. +The 5-second interval can be changed with the "-i sec" option. +.nf +sudo turbostat --quiet --show CPU,frequency + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz + - - 524 12.48 4198 3096 74.53 3800 + 0 0 4 0.09 4081 3096 98.88 3800 + 0 4 1 0.02 4063 3096 + 1 1 2 0.06 4063 3096 99.60 + 1 5 2 0.05 4070 3096 + 2 2 4178 99.52 4199 3096 0.00 + 2 6 3 0.08 4159 3096 + 3 3 1 0.04 4046 3096 99.66 + 3 7 0 0.01 3989 3096 + Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c7 UncMhz + - - 525 12.52 4198 3096 74.54 3800 + 0 0 4 0.10 4051 3096 99.49 3800 + 0 4 2 0.04 3993 3096 + 1 1 3 0.07 4054 3096 99.56 + 1 5 4 0.10 4018 3096 + 2 2 4178 99.51 4199 3096 0.00 + 2 6 4 0.09 4143 3096 + 3 3 2 0.06 4026 3096 99.10 + 3 7 7 0.17 4074 3096 +.fi +This example also shows the use of the --show option to show only the desired columns. + +.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE + +By default, turbostat always dumps system configuration information +before taking measurements. In the example above, "--quiet" is used +to suppress that output. Here is an example of the configuration information: +.nf +turbostat version 2022.04.16 - Len Brown <lenb@kernel.org> +Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.18.0-rc6-00001-ge6891250e3b5 ... +CPUID(0): GenuineIntel 0x16 CPUID levels +CPUID(1): family:model:stepping 0x6:9e:9 (6:158:9) microcode 0xea +CPUID(0x80000000): max_extended_levels: 0x80000008 +CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM HT TM +CPUID(6): APERF, TURBO, DTS, PTM, HWP, HWPnotify, HWPwindow, HWPepp, No-HWPpkg, EPB +cpu7: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST MWAIT PREFETCH TURBO) +CPUID(7): SGX +cpu7: MSR_IA32_FEATURE_CONTROL: 0x00000005 (Locked ) +CPUID(0x15): eax_crystal: 2 ebx_tsc: 258 ecx_crystal_hz: 0 +TSC: 3096 MHz (24000000 Hz * 258 / 2 / 1000000) +CPUID(0x16): base_mhz: 3100 max_mhz: 4200 bus_mhz: 100 +cpu7: MSR_MISC_PWR_MGMT: 0x00401cc0 (ENable-EIST_Coordination DISable-EPB DISable-OOB) +RAPL: 5825 sec. Joule Counter Range, at 45 Watts +cpu7: MSR_PLATFORM_INFO: 0x80839f1011f00 +8 * 100.0 = 800.0 MHz max efficiency frequency +31 * 100.0 = 3100.0 MHz base frequency +cpu7: MSR_IA32_POWER_CTL: 0x002c005d (C1E auto-promotion: DISabled) +cpu7: MSR_TURBO_RATIO_LIMIT: 0x2728292a +39 * 100.0 = 3900.0 MHz max turbo 4 active cores +40 * 100.0 = 4000.0 MHz max turbo 3 active cores +41 * 100.0 = 4100.0 MHz max turbo 2 active cores +42 * 100.0 = 4200.0 MHz max turbo 1 active cores +cpu7: MSR_CONFIG_TDP_NOMINAL: 0x0000001f (base_ratio=31) +cpu7: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 () +cpu7: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 () +cpu7: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1) +cpu7: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0) +cpu7: MSR_PKG_CST_CONFIG_CONTROL: 0x1e008008 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked, pkg-cstate-limit=8 (unlimited)) +Uncore Frequency pkg0 die0: 800 - 3900 MHz (800 - 3900 MHz) +/dev/cpu_dma_latency: 2000000000 usec (default) +current_driver: intel_idle +current_governor: menu +current_governor_ro: menu +cpu7: POLL: CPUIDLE CORE POLL IDLE +cpu7: C1: MWAIT 0x00 +cpu7: C1E: MWAIT 0x01 +cpu7: C3: MWAIT 0x10 +cpu7: C6: MWAIT 0x20 +cpu7: C7s: MWAIT 0x33 +cpu7: C8: MWAIT 0x40 +cpu7: C9: MWAIT 0x50 +cpu7: C10: MWAIT 0x60 +cpu7: cpufreq driver: intel_pstate +cpu7: cpufreq governor: performance +cpufreq intel_pstate no_turbo: 0 +cpu7: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch) +cpu0: MSR_PM_ENABLE: 0x00000001 (HWP) +cpu0: MSR_HWP_CAPABILITIES: 0x01101f53 (high 83 guar 31 eff 16 low 1) +cpu0: MSR_HWP_REQUEST: 0x00005353 (min 83 max 83 des 0 epp 0x0 window 0x0 pkg 0x0) +cpu0: MSR_HWP_INTERRUPT: 0x00000001 (EN_Guaranteed_Perf_Change, Dis_Excursion_Min) +cpu0: MSR_HWP_STATUS: 0x00000004 (No-Guaranteed_Perf_Change, No-Excursion_Min) +cpu0: EPB: 6 (balanced) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x00000168 (45 W TDP, RAPL 0 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x42820800218208 (UNlocked) +cpu0: PKG Limit #1: ENabled (65.000 Watts, 64.000000 sec, clamp ENabled) +cpu0: PKG Limit #2: ENabled (65.000 Watts, 0.002441* sec, clamp DISabled) +cpu0: MSR_VR_CURRENT_CONFIG: 0x00000000 +cpu0: PKG Limit #4: 0.000000 Watts (UNlocked) +cpu0: MSR_DRAM_POWER_LIMIT: 0x5400de00000000 (UNlocked) +cpu0: DRAM Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_PP0_POLICY: 0 +cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: Cores Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_PP1_POLICY: 0 +cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: GFX Limit: DISabled (0.000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00640000 (100 C) (100 default - 0 offset) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88200800 (68 C) +cpu0: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x00000003 (100 C, 100 C) +cpu7: MSR_PKGC3_IRTL: 0x0000884e (valid, 79872 ns) +cpu7: MSR_PKGC6_IRTL: 0x00008876 (valid, 120832 ns) +cpu7: MSR_PKGC7_IRTL: 0x00008894 (valid, 151552 ns) +cpu7: MSR_PKGC8_IRTL: 0x000088fa (valid, 256000 ns) +cpu7: MSR_PKGC9_IRTL: 0x0000894c (valid, 339968 ns) +cpu7: MSR_PKGC10_IRTL: 0x00008bf2 (valid, 1034240 ns) +.fi +.PP +The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency +available at the minimum package voltage. The \fBTSC frequency\fP is the base +frequency of the processor -- this should match the brand string +in /proc/cpuinfo. This base frequency +should be sustainable on all CPUs indefinitely, given nominal power and cooling. +The remaining rows show what maximum turbo frequency is possible +depending on the number of idle cores. Note that not all information is +available on all processors. +.SH ADD COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), +labeling it with the column header, "PRF_CTRL", and display it only once, +afte the conclusion of a 0.1 second sleep. +.nf +sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 +0.101604 sec +CPU PRF_CTRL +- 0x00000000 +0 0x00000c00 +1 0x00000800 +2 0x00000a00 +3 0x00000800 + +.fi + +.SH INPUT + +For interval-mode, turbostat will immediately end the current interval +when it sees a newline on standard input. +turbostat will then start the next interval. +Control-C will be send a SIGINT to turbostat, +which will immediately abort the program with no further processing. +.SH SIGNALS + +SIGINT will interrupt interval-mode. +The end-of-interval data will be collected and displayed before turbostat exits. + +SIGUSR1 will end current interval, +end-of-interval data will be collected and displayed before turbostat +starts a new interval. +.SH NOTES + +.B "turbostat " +must be run as root. +Alternatively, non-root users can be enabled to run turbostat this way: + +# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat + +# chmod +r /dev/cpu/*/msr + +# chmod +r /dev/cpu_dma_latency + +.B "turbostat " +reads hardware counters, but doesn't write them. +So it will not interfere with the OS or other programs, including +multiple invocations of itself. + +\fBturbostat \fP +may work poorly on Linux-2.6.20 through 2.6.29, +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs +in those kernels. + +AVG_MHz = APERF_delta/measurement_interval. This is the actual +number of elapsed cycles divided by the entire sample interval -- +including idle time. Note that this calculation is resilient +to systems lacking a non-stop TSC. + +TSC_MHz = TSC_delta/measurement_interval. +On a system with an invariant TSC, this value will be constant +and will closely match the base frequency value shown +in the brand string in /proc/cpuinfo. On a system where +the TSC stops in idle, TSC_MHz will drop +below the processor's base frequency. + +Busy% = MPERF_delta/TSC_delta + +Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval + +Note that these calculations depend on TSC_delta, so they +are not reliable during intervals when TSC_MHz is not running at the base frequency. + +Turbostat data collection is not atomic. +Extremely short measurement intervals (much less than 1 second), +or system activity that prevents turbostat from being able +to run on all CPUS to quickly collect data, will result in +inconsistent results. + +The APERF, MPERF MSRs are defined to count non-halted cycles. +Although it is not guaranteed by the architecture, turbostat assumes +that they count at TSC rate, which is true on all processors tested to date. + +.SH REFERENCES +Volume 3B: System Programming Guide" +https://www.intel.com/products/processor/manuals/ + +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4), vmstat(8) +.PP +.SH AUTHOR +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c new file mode 100644 index 000000000..b113900d9 --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.c @@ -0,0 +1,6757 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * turbostat -- show CPU frequency and C-state residency + * on modern Intel and AMD processors. + * + * Copyright (c) 2022 Intel Corporation. + * Len Brown <len.brown@intel.com> + */ + +#define _GNU_SOURCE +#include MSRHEADER +#include INTEL_FAMILY_HEADER +#include <stdarg.h> +#include <stdio.h> +#include <err.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/select.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <getopt.h> +#include <dirent.h> +#include <string.h> +#include <ctype.h> +#include <sched.h> +#include <time.h> +#include <cpuid.h> +#include <sys/capability.h> +#include <errno.h> +#include <math.h> +#include <linux/perf_event.h> +#include <asm/unistd.h> +#include <stdbool.h> + +#define UNUSED(x) (void)(x) + +/* + * This list matches the column headers, except + * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time + * 2. Core and CPU are moved to the end, we can't have strings that contain them + * matching on them for --show and --hide. + */ + +/* + * buffer size used by sscanf() for added column names + * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters + */ +#define NAME_BYTES 20 +#define PATH_BYTES 128 + +enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; +enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC }; +enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT }; + +struct msr_counter { + unsigned int msr_num; + char name[NAME_BYTES]; + char path[PATH_BYTES]; + unsigned int width; + enum counter_type type; + enum counter_format format; + struct msr_counter *next; + unsigned int flags; +#define FLAGS_HIDE (1 << 0) +#define FLAGS_SHOW (1 << 1) +#define SYSFS_PERCPU (1 << 1) +}; + +struct msr_counter bic[] = { + { 0x0, "usec", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Time_Of_Day_Seconds", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Package", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Node", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Avg_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Busy%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Bzy_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "TSC_MHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IRQ", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL, 0 }, + { 0x0, "sysfs", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c1", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%c7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "ThreadC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgTmp", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%rc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc2", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc3", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc7", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc8", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg%pc9", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pk%pc10", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "SYS%LPI", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CorWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PkgCnt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAMWatt", "", 0, 0, 0, NULL, 0 }, + { 0x0, "PKG_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Pkg_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Cor_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "RAM_J", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Mod%c6", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Totl%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Any%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFX%C0", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPUGFX%", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Core", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CPU", "", 0, 0, 0, NULL, 0 }, + { 0x0, "APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "X2APIC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "Die", "", 0, 0, 0, NULL, 0 }, + { 0x0, "GFXAMHz", "", 0, 0, 0, NULL, 0 }, + { 0x0, "IPC", "", 0, 0, 0, NULL, 0 }, + { 0x0, "CoreThr", "", 0, 0, 0, NULL, 0 }, + { 0x0, "UncMHz", "", 0, 0, 0, NULL, 0 }, +}; + +#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) +#define BIC_USEC (1ULL << 0) +#define BIC_TOD (1ULL << 1) +#define BIC_Package (1ULL << 2) +#define BIC_Node (1ULL << 3) +#define BIC_Avg_MHz (1ULL << 4) +#define BIC_Busy (1ULL << 5) +#define BIC_Bzy_MHz (1ULL << 6) +#define BIC_TSC_MHz (1ULL << 7) +#define BIC_IRQ (1ULL << 8) +#define BIC_SMI (1ULL << 9) +#define BIC_sysfs (1ULL << 10) +#define BIC_CPU_c1 (1ULL << 11) +#define BIC_CPU_c3 (1ULL << 12) +#define BIC_CPU_c6 (1ULL << 13) +#define BIC_CPU_c7 (1ULL << 14) +#define BIC_ThreadC (1ULL << 15) +#define BIC_CoreTmp (1ULL << 16) +#define BIC_CoreCnt (1ULL << 17) +#define BIC_PkgTmp (1ULL << 18) +#define BIC_GFX_rc6 (1ULL << 19) +#define BIC_GFXMHz (1ULL << 20) +#define BIC_Pkgpc2 (1ULL << 21) +#define BIC_Pkgpc3 (1ULL << 22) +#define BIC_Pkgpc6 (1ULL << 23) +#define BIC_Pkgpc7 (1ULL << 24) +#define BIC_Pkgpc8 (1ULL << 25) +#define BIC_Pkgpc9 (1ULL << 26) +#define BIC_Pkgpc10 (1ULL << 27) +#define BIC_CPU_LPI (1ULL << 28) +#define BIC_SYS_LPI (1ULL << 29) +#define BIC_PkgWatt (1ULL << 30) +#define BIC_CorWatt (1ULL << 31) +#define BIC_GFXWatt (1ULL << 32) +#define BIC_PkgCnt (1ULL << 33) +#define BIC_RAMWatt (1ULL << 34) +#define BIC_PKG__ (1ULL << 35) +#define BIC_RAM__ (1ULL << 36) +#define BIC_Pkg_J (1ULL << 37) +#define BIC_Cor_J (1ULL << 38) +#define BIC_GFX_J (1ULL << 39) +#define BIC_RAM_J (1ULL << 40) +#define BIC_Mod_c6 (1ULL << 41) +#define BIC_Totl_c0 (1ULL << 42) +#define BIC_Any_c0 (1ULL << 43) +#define BIC_GFX_c0 (1ULL << 44) +#define BIC_CPUGFX (1ULL << 45) +#define BIC_Core (1ULL << 46) +#define BIC_CPU (1ULL << 47) +#define BIC_APIC (1ULL << 48) +#define BIC_X2APIC (1ULL << 49) +#define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) +#define BIC_IPC (1ULL << 52) +#define BIC_CORE_THROT_CNT (1ULL << 53) +#define BIC_UNCORE_MHZ (1ULL << 54) + +#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) +#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) +#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_UNCORE_MHZ) +#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX) +#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) + +#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) + +unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT); +unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC; + +#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME) +#define DO_BIC_READ(COUNTER_NAME) (bic_present & COUNTER_NAME) +#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME) +#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT) +#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT) +#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT) + +char *proc_stat = "/proc/stat"; +FILE *outf; +int *fd_percpu; +int *fd_instr_count_percpu; +struct timeval interval_tv = { 5, 0 }; +struct timespec interval_ts = { 5, 0 }; + +/* Save original CPU model */ +unsigned int model_orig; + +unsigned int num_iterations; +unsigned int header_iterations; +unsigned int debug; +unsigned int quiet; +unsigned int shown; +unsigned int sums_need_wide_columns; +unsigned int rapl_joules; +unsigned int summary_only; +unsigned int list_header_only; +unsigned int dump_only; +unsigned int do_snb_cstates; +unsigned int do_knl_cstates; +unsigned int do_slm_cstates; +unsigned int use_c1_residency_msr; +unsigned int has_aperf; +unsigned int has_epb; +unsigned int has_turbo; +unsigned int is_hybrid; +unsigned int do_irtl_snb; +unsigned int do_irtl_hsw; +unsigned int units = 1000000; /* MHz etc */ +unsigned int genuine_intel; +unsigned int authentic_amd; +unsigned int hygon_genuine; +unsigned int max_level, max_extended_level; +unsigned int has_invariant_tsc; +unsigned int do_nhm_platform_info; +unsigned int no_MSR_MISC_PWR_MGMT; +unsigned int aperf_mperf_multiplier = 1; +double bclk; +double base_hz; +unsigned int has_base_hz; +double tsc_tweak = 1.0; +unsigned int show_pkg_only; +unsigned int show_core_only; +char *output_buffer, *outp; +unsigned int do_rapl; +unsigned int do_dts; +unsigned int do_ptm; +unsigned int do_ipc; +unsigned long long gfx_cur_rc6_ms; +unsigned long long cpuidle_cur_cpu_lpi_us; +unsigned long long cpuidle_cur_sys_lpi_us; +unsigned int gfx_cur_mhz; +unsigned int gfx_act_mhz; +unsigned int tj_max; +unsigned int tj_max_override; +int tcc_offset_bits; +double rapl_power_units, rapl_time_units; +double rapl_dram_energy_units, rapl_energy_units; +double rapl_joule_counter_range; +unsigned int do_core_perf_limit_reasons; +unsigned int has_automatic_cstate_conversion; +unsigned int dis_cstate_prewake; +unsigned int do_gfx_perf_limit_reasons; +unsigned int do_ring_perf_limit_reasons; +unsigned int crystal_hz; +unsigned long long tsc_hz; +int base_cpu; +double discover_bclk(unsigned int family, unsigned int model); +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */ +unsigned int has_misc_feature_control; +unsigned int first_counter_read = 1; +int ignore_stdin; + +#define RAPL_PKG (1 << 0) + /* 0x610 MSR_PKG_POWER_LIMIT */ + /* 0x611 MSR_PKG_ENERGY_STATUS */ +#define RAPL_PKG_PERF_STATUS (1 << 1) + /* 0x613 MSR_PKG_PERF_STATUS */ +#define RAPL_PKG_POWER_INFO (1 << 2) + /* 0x614 MSR_PKG_POWER_INFO */ + +#define RAPL_DRAM (1 << 3) + /* 0x618 MSR_DRAM_POWER_LIMIT */ + /* 0x619 MSR_DRAM_ENERGY_STATUS */ +#define RAPL_DRAM_PERF_STATUS (1 << 4) + /* 0x61b MSR_DRAM_PERF_STATUS */ +#define RAPL_DRAM_POWER_INFO (1 << 5) + /* 0x61c MSR_DRAM_POWER_INFO */ + +#define RAPL_CORES_POWER_LIMIT (1 << 6) + /* 0x638 MSR_PP0_POWER_LIMIT */ +#define RAPL_CORE_POLICY (1 << 7) + /* 0x63a MSR_PP0_POLICY */ + +#define RAPL_GFX (1 << 8) + /* 0x640 MSR_PP1_POWER_LIMIT */ + /* 0x641 MSR_PP1_ENERGY_STATUS */ + /* 0x642 MSR_PP1_POLICY */ + +#define RAPL_CORES_ENERGY_STATUS (1 << 9) + /* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_PER_CORE_ENERGY (1 << 10) + /* Indicates cores energy collection is per-core, + * not per-package. */ +#define RAPL_AMD_F17H (1 << 11) + /* 0xc0010299 MSR_RAPL_PWR_UNIT */ + /* 0xc001029a MSR_CORE_ENERGY_STAT */ + /* 0xc001029b MSR_PKG_ENERGY_STAT */ +#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT) +#define TJMAX_DEFAULT 100 + +/* MSRs that are not yet in the kernel-provided header. */ +#define MSR_RAPL_PWR_UNIT 0xc0010299 +#define MSR_CORE_ENERGY_STAT 0xc001029a +#define MSR_PKG_ENERGY_STAT 0xc001029b + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +int backwards_count; +char *progname; + +#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */ +cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset; +size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size; +#define MAX_ADDED_COUNTERS 8 +#define MAX_ADDED_THREAD_COUNTERS 24 +#define BITMASK_SIZE 32 + +struct thread_data { + struct timeval tv_begin; + struct timeval tv_end; + struct timeval tv_delta; + unsigned long long tsc; + unsigned long long aperf; + unsigned long long mperf; + unsigned long long c1; + unsigned long long instr_count; + unsigned long long irq_count; + unsigned int smi_count; + unsigned int cpu_id; + unsigned int apic_id; + unsigned int x2apic_id; + unsigned int flags; + bool is_atom; +#define CPU_IS_FIRST_THREAD_IN_CORE 0x2 +#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4 + unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; +} *thread_even, *thread_odd; + +struct core_data { + unsigned long long c3; + unsigned long long c6; + unsigned long long c7; + unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ + unsigned int core_temp_c; + unsigned int core_energy; /* MSR_CORE_ENERGY_STAT */ + unsigned int core_id; + unsigned long long core_throt_cnt; + unsigned long long counter[MAX_ADDED_COUNTERS]; +} *core_even, *core_odd; + +struct pkg_data { + unsigned long long pc2; + unsigned long long pc3; + unsigned long long pc6; + unsigned long long pc7; + unsigned long long pc8; + unsigned long long pc9; + unsigned long long pc10; + unsigned long long cpu_lpi; + unsigned long long sys_lpi; + unsigned long long pkg_wtd_core_c0; + unsigned long long pkg_any_core_c0; + unsigned long long pkg_any_gfxe_c0; + unsigned long long pkg_both_core_gfxe_c0; + long long gfx_rc6_ms; + unsigned int gfx_mhz; + unsigned int gfx_act_mhz; + unsigned int package_id; + unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned int pkg_temp_c; + unsigned int uncore_mhz; + unsigned long long counter[MAX_ADDED_COUNTERS]; +} *package_even, *package_odd; + +#define ODD_COUNTERS thread_odd, core_odd, package_odd +#define EVEN_COUNTERS thread_even, core_even, package_even + +#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \ + ((thread_base) + \ + ((pkg_no) * \ + topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \ + ((node_no) * topo.cores_per_node * topo.threads_per_core) + \ + ((core_no) * topo.threads_per_core) + \ + (thread_no)) + +#define GET_CORE(core_base, core_no, node_no, pkg_no) \ + ((core_base) + \ + ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \ + ((node_no) * topo.cores_per_node) + \ + (core_no)) + +#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) + +/* + * The accumulated sum of MSR is defined as a monotonic + * increasing MSR, it will be accumulated periodically, + * despite its register's bit width. + */ +enum { + IDX_PKG_ENERGY, + IDX_DRAM_ENERGY, + IDX_PP0_ENERGY, + IDX_PP1_ENERGY, + IDX_PKG_PERF, + IDX_DRAM_PERF, + IDX_COUNT, +}; + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); + +struct msr_sum_array { + /* get_msr_sum() = sum + (get_msr() - last) */ + struct { + /*The accumulated MSR value is updated by the timer */ + unsigned long long sum; + /*The MSR footprint recorded in last timer */ + unsigned long long last; + } entries[IDX_COUNT]; +}; + +/* The percpu MSR sum array.*/ +struct msr_sum_array *per_cpu_msr_sum; + +off_t idx_to_offset(int idx) +{ + off_t offset; + + switch (idx) { + case IDX_PKG_ENERGY: + if (do_rapl & RAPL_AMD_F17H) + offset = MSR_PKG_ENERGY_STAT; + else + offset = MSR_PKG_ENERGY_STATUS; + break; + case IDX_DRAM_ENERGY: + offset = MSR_DRAM_ENERGY_STATUS; + break; + case IDX_PP0_ENERGY: + offset = MSR_PP0_ENERGY_STATUS; + break; + case IDX_PP1_ENERGY: + offset = MSR_PP1_ENERGY_STATUS; + break; + case IDX_PKG_PERF: + offset = MSR_PKG_PERF_STATUS; + break; + case IDX_DRAM_PERF: + offset = MSR_DRAM_PERF_STATUS; + break; + default: + offset = -1; + } + return offset; +} + +int offset_to_idx(off_t offset) +{ + int idx; + + switch (offset) { + case MSR_PKG_ENERGY_STATUS: + case MSR_PKG_ENERGY_STAT: + idx = IDX_PKG_ENERGY; + break; + case MSR_DRAM_ENERGY_STATUS: + idx = IDX_DRAM_ENERGY; + break; + case MSR_PP0_ENERGY_STATUS: + idx = IDX_PP0_ENERGY; + break; + case MSR_PP1_ENERGY_STATUS: + idx = IDX_PP1_ENERGY; + break; + case MSR_PKG_PERF_STATUS: + idx = IDX_PKG_PERF; + break; + case MSR_DRAM_PERF_STATUS: + idx = IDX_DRAM_PERF; + break; + default: + idx = -1; + } + return idx; +} + +int idx_valid(int idx) +{ + switch (idx) { + case IDX_PKG_ENERGY: + return do_rapl & (RAPL_PKG | RAPL_AMD_F17H); + case IDX_DRAM_ENERGY: + return do_rapl & RAPL_DRAM; + case IDX_PP0_ENERGY: + return do_rapl & RAPL_CORES_ENERGY_STATUS; + case IDX_PP1_ENERGY: + return do_rapl & RAPL_GFX; + case IDX_PKG_PERF: + return do_rapl & RAPL_PKG_PERF_STATUS; + case IDX_DRAM_PERF: + return do_rapl & RAPL_DRAM_PERF_STATUS; + default: + return 0; + } +} + +struct sys_counters { + unsigned int added_thread_counters; + unsigned int added_core_counters; + unsigned int added_package_counters; + struct msr_counter *tp; + struct msr_counter *cp; + struct msr_counter *pp; +} sys; + +struct system_summary { + struct thread_data threads; + struct core_data cores; + struct pkg_data packages; +} average; + +struct cpu_topology { + int physical_package_id; + int die_id; + int logical_cpu_id; + int physical_node_id; + int logical_node_id; /* 0-based count within the package */ + int physical_core_id; + int thread_id; + cpu_set_t *put_ids; /* Processing Unit/Thread IDs */ +} *cpus; + +struct topo_params { + int num_packages; + int num_die; + int num_cpus; + int num_cores; + int max_cpu_num; + int max_node_num; + int nodes_per_pkg; + int cores_per_node; + int threads_per_core; +} topo; + +struct timeval tv_even, tv_odd, tv_delta; + +int *irq_column_2_cpu; /* /proc/interrupts column numbers */ +int *irqs_per_cpu; /* indexed by cpu_num */ + +void setup_all_buffers(void); + +char *sys_lpi_file; +char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us"; +char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec"; + +int cpu_is_not_present(int cpu) +{ + return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); +} + +/* + * run func(thread, core, package) in topology order + * skip non-present cpus + */ + +int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *), + struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) +{ + int retval, pkg_no, core_no, thread_no, node_no; + + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { + for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + c = GET_CORE(core_base, core_no, node_no, pkg_no); + p = GET_PKG(pkg_base, pkg_no); + + retval = func(t, c, p); + if (retval) + return retval; + } + } + } + } + return 0; +} + +int cpu_migrate(int cpu) +{ + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); + if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) + return -1; + else + return 0; +} + +int get_msr_fd(int cpu) +{ + char pathname[32]; + int fd; + + fd = fd_percpu[cpu]; + + if (fd) + return fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + fd_percpu[cpu] = fd; + + return fd; +} + +static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) +{ + return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags); +} + +static int perf_instr_count_open(int cpu_num) +{ + struct perf_event_attr pea; + int fd; + + memset(&pea, 0, sizeof(struct perf_event_attr)); + pea.type = PERF_TYPE_HARDWARE; + pea.size = sizeof(struct perf_event_attr); + pea.config = PERF_COUNT_HW_INSTRUCTIONS; + + /* counter for cpu_num, including user + kernel and all processes */ + fd = perf_event_open(&pea, -1, cpu_num, -1, 0); + if (fd == -1) { + warn("cpu%d: perf instruction counter", cpu_num); + BIC_NOT_PRESENT(BIC_IPC); + } + + return fd; +} + +int get_instr_count_fd(int cpu) +{ + if (fd_instr_count_percpu[cpu]) + return fd_instr_count_percpu[cpu]; + + fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu); + + return fd_instr_count_percpu[cpu]; +} + +int get_msr(int cpu, off_t offset, unsigned long long *msr) +{ + ssize_t retval; + + retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset); + + if (retval != sizeof *msr) + err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset); + + return 0; +} + +#define MAX_DEFERRED 16 +char *deferred_add_names[MAX_DEFERRED]; +char *deferred_skip_names[MAX_DEFERRED]; +int deferred_add_index; +int deferred_skip_index; + +/* + * HIDE_LIST - hide this list of counters, show the rest [default] + * SHOW_LIST - show this list of counters, hide the rest + */ +enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST; + +void help(void) +{ + fprintf(outf, + "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n" + "\n" + "Turbostat forks the specified COMMAND and prints statistics\n" + "when COMMAND completes.\n" + "If no COMMAND is specified, turbostat wakes every 5-seconds\n" + "to print statistics, until interrupted.\n" + " -a, --add add a counter\n" + " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" + " {core | package | j,k,l..m,n-p }\n" + " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " -D, --Dump displays the raw counter values\n" + " -e, --enable [all | column]\n" + " shows all or the specified disabled column\n" + " -H, --hide [column|column,column,...]\n" + " hide the specified column(s)\n" + " -i, --interval sec.subsec\n" + " Override default 5-second measurement interval\n" + " -J, --Joules displays energy in Joules instead of Watts\n" + " -l, --list list column headers only\n" + " -n, --num_iterations num\n" + " number of the measurement iterations\n" + " -N, --header_iterations num\n" + " print header every num iterations\n" + " -o, --out file\n" + " create or truncate \"file\" for all output\n" + " -q, --quiet skip decoding system configuration header\n" + " -s, --show [column|column,column,...]\n" + " show only the specified column(s)\n" + " -S, --Summary\n" + " limits output to 1-line system summary per interval\n" + " -T, --TCC temperature\n" + " sets the Thermal Control Circuit temperature in\n" + " degrees Celsius\n" + " -h, --help print this help message\n" + " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n"); +} + +/* + * bic_lookup + * for all the strings in comma separate name_list, + * set the approprate bit in return value. + */ +unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) +{ + unsigned int i; + unsigned long long retval = 0; + + while (name_list) { + char *comma; + + comma = strchr(name_list, ','); + + if (comma) + *comma = '\0'; + + for (i = 0; i < MAX_BIC; ++i) { + if (!strcmp(name_list, bic[i].name)) { + retval |= (1ULL << i); + break; + } + if (!strcmp(name_list, "all")) { + retval |= ~0; + break; + } else if (!strcmp(name_list, "topology")) { + retval |= BIC_TOPOLOGY; + break; + } else if (!strcmp(name_list, "power")) { + retval |= BIC_THERMAL_PWR; + break; + } else if (!strcmp(name_list, "idle")) { + retval |= BIC_IDLE; + break; + } else if (!strcmp(name_list, "frequency")) { + retval |= BIC_FREQUENCY; + break; + } else if (!strcmp(name_list, "other")) { + retval |= BIC_OTHER; + break; + } + + } + if (i == MAX_BIC) { + if (mode == SHOW_LIST) { + deferred_add_names[deferred_add_index++] = name_list; + if (deferred_add_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --add options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } + } else { + deferred_skip_names[deferred_skip_index++] = name_list; + if (debug) + fprintf(stderr, "deferred \"%s\"\n", name_list); + if (deferred_skip_index >= MAX_DEFERRED) { + fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n", + MAX_DEFERRED, name_list); + help(); + exit(1); + } + } + } + + name_list = comma; + if (name_list) + name_list++; + + } + return retval; +} + +void print_header(char *delim) +{ + struct msr_counter *mp; + int printed = 0; + + if (DO_BIC(BIC_USEC)) + outp += sprintf(outp, "%susec", (printed++ ? delim : "")); + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : "")); + if (DO_BIC(BIC_Package)) + outp += sprintf(outp, "%sPackage", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%sDie", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%sNode", (printed++ ? delim : "")); + if (DO_BIC(BIC_Core)) + outp += sprintf(outp, "%sCore", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%sCPU", (printed++ ? delim : "")); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%sAPIC", (printed++ ? delim : "")); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : "")); + if (DO_BIC(BIC_Avg_MHz)) + outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Busy)) + outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_Bzy_MHz)) + outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_TSC_MHz)) + outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : "")); + + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "%sIPC", (printed++ ? delim : "")); + + if (DO_BIC(BIC_IRQ)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s IRQ", (printed++ ? delim : "")); + else + outp += sprintf(outp, "%sIRQ", (printed++ ? delim : "")); + } + + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "%sSMI", (printed++ ? delim : "")); + + for (mp = sys.tp; mp; mp = mp->next) { + + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name); + } else { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name); + } + } + + if (DO_BIC(BIC_CPU_c1)) + outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c3)) + outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c6)) + outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_c7)) + outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : "")); + + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : "")); + + if (DO_BIC(BIC_CoreTmp)) + outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : "")); + + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%sCoreThr", (printed++ ? delim : "")); + + if (do_rapl && !rapl_joules) { + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + } else if (do_rapl && rapl_joules) { + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + } + + for (mp = sys.cp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "%s%18.18s", delim, mp->name); + else + outp += sprintf(outp, "%s%10.10s", delim, mp->name); + } else { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", delim, mp->name); + else + outp += sprintf(outp, "%s%s", delim, mp->name); + } + } + + if (DO_BIC(BIC_PkgTmp)) + outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); + + if (DO_BIC(BIC_GFX_rc6)) + outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : "")); + + if (DO_BIC(BIC_GFXMHz)) + outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); + + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + + if (DO_BIC(BIC_Totl_c0)) + outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_Any_c0)) + outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFX_c0)) + outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPUGFX)) + outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : "")); + + if (DO_BIC(BIC_Pkgpc2)) + outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc3)) + outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc6)) + outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc7)) + outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc8)) + outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc9)) + outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); + if (DO_BIC(BIC_Pkgpc10)) + outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU_LPI)) + outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); + if (DO_BIC(BIC_SYS_LPI)) + outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : "")); + + if (do_rapl && !rapl_joules) { + if (DO_BIC(BIC_PkgWatt)) + outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXWatt)) + outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAMWatt)) + outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : "")); + if (DO_BIC(BIC_PKG__)) + outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM__)) + outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); + } else if (do_rapl && rapl_joules) { + if (DO_BIC(BIC_Pkg_J)) + outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, "%sCor_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFX_J)) + outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM_J)) + outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : "")); + if (DO_BIC(BIC_PKG__)) + outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : "")); + if (DO_BIC(BIC_RAM__)) + outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_UNCORE_MHZ)) + outp += sprintf(outp, "%sUncMHz", (printed++ ? delim : "")); + + for (mp = sys.pp; mp; mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 64) + outp += sprintf(outp, "%s%18.18s", delim, mp->name); + else + outp += sprintf(outp, "%s%10.10s", delim, mp->name); + } else { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", delim, mp->name); + else + outp += sprintf(outp, "%s%s", delim, mp->name); + } + } + + outp += sprintf(outp, "\n"); +} + +int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i; + struct msr_counter *mp; + + outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); + + if (t) { + outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags); + outp += sprintf(outp, "TSC: %016llX\n", t->tsc); + outp += sprintf(outp, "aperf: %016llX\n", t->aperf); + outp += sprintf(outp, "mperf: %016llX\n", t->mperf); + outp += sprintf(outp, "c1: %016llX\n", t->c1); + + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "IPC: %lld\n", t->instr_count); + + if (DO_BIC(BIC_IRQ)) + outp += sprintf(outp, "IRQ: %lld\n", t->irq_count); + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "SMI: %d\n", t->smi_count); + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]); + } + } + + if (c) { + outp += sprintf(outp, "core: %d\n", c->core_id); + outp += sprintf(outp, "c3: %016llX\n", c->c3); + outp += sprintf(outp, "c6: %016llX\n", c->c6); + outp += sprintf(outp, "c7: %016llX\n", c->c7); + outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); + outp += sprintf(outp, "cpu_throt_count: %016llX\n", c->core_throt_cnt); + outp += sprintf(outp, "Joules: %0X\n", c->core_energy); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); + } + outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); + } + + if (p) { + outp += sprintf(outp, "package: %d\n", p->package_id); + + outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); + outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); + outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); + outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); + + outp += sprintf(outp, "pc2: %016llX\n", p->pc2); + if (DO_BIC(BIC_Pkgpc3)) + outp += sprintf(outp, "pc3: %016llX\n", p->pc3); + if (DO_BIC(BIC_Pkgpc6)) + outp += sprintf(outp, "pc6: %016llX\n", p->pc6); + if (DO_BIC(BIC_Pkgpc7)) + outp += sprintf(outp, "pc7: %016llX\n", p->pc7); + outp += sprintf(outp, "pc8: %016llX\n", p->pc8); + outp += sprintf(outp, "pc9: %016llX\n", p->pc9); + outp += sprintf(outp, "pc10: %016llX\n", p->pc10); + outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); + outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); + outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]); + } + } + + outp += sprintf(outp, "\n"); + + return 0; +} + +/* + * column formatting convention & formats + */ +int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + double interval_float, tsc; + char *fmt8; + int i; + struct msr_counter *mp; + char *delim = "\t"; + int printed = 0; + + /* if showing only 1st thread in core and this isn't one, bail out */ + if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + /* if showing only 1st thread in pkg and this isn't one, bail out */ + if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + /*if not summary line and --cpu is used */ + if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset))) + return 0; + + if (DO_BIC(BIC_USEC)) { + /* on each row, print how many usec each timestamp took to gather */ + struct timeval tv; + + timersub(&t->tv_end, &t->tv_begin, &tv); + outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec); + } + + /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */ + if (DO_BIC(BIC_TOD)) + outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec); + + interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0; + + tsc = t->tsc * tsc_tweak; + + /* topo columns, print blanks on 1st (average) line */ + if (t == &average.threads) { + if (DO_BIC(BIC_Package)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Die)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Node)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_Core)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } else { + if (DO_BIC(BIC_Package)) { + if (p) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_Die)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), cpus[t->cpu_id].die_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_Node)) { + if (t) + outp += sprintf(outp, "%s%d", + (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_Core)) { + if (c) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id); + else + outp += sprintf(outp, "%s-", (printed++ ? delim : "")); + } + if (DO_BIC(BIC_CPU)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id); + if (DO_BIC(BIC_APIC)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id); + if (DO_BIC(BIC_X2APIC)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id); + } + + if (DO_BIC(BIC_Avg_MHz)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float); + + if (DO_BIC(BIC_Busy)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc); + + if (DO_BIC(BIC_Bzy_MHz)) { + if (has_base_hz) + outp += + sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf); + else + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), + tsc / units * t->aperf / t->mperf / interval_float); + } + + if (DO_BIC(BIC_TSC_MHz)) + outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float); + + if (DO_BIC(BIC_IPC)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf); + + /* IRQ */ + if (DO_BIC(BIC_IRQ)) { + if (sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count); + } + + /* SMI */ + if (DO_BIC(BIC_SMI)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count); + + /* Added counters */ + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + if (mp->type == COUNTER_USEC) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->counter[i] / interval_float / 10000); + else + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc); + } + } + + /* C1 */ + if (DO_BIC(BIC_CPU_c1)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); + + /* print per-core data only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + goto done; + + if (DO_BIC(BIC_CPU_c3)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc); + if (DO_BIC(BIC_CPU_c6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc); + if (DO_BIC(BIC_CPU_c7)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc); + + /* Mod%c6 */ + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc); + + if (DO_BIC(BIC_CoreTmp)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c); + + /* Core throttle count */ + if (DO_BIC(BIC_CORE_THROT_CNT)) + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->core_throt_cnt); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc); + } + } + + fmt8 = "%s%.2f"; + + if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); + if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units); + + /* print per-package data only for 1st core in package */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + goto done; + + /* PkgTmp */ + if (DO_BIC(BIC_PkgTmp)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c); + + /* GFXrc6 */ + if (DO_BIC(BIC_GFX_rc6)) { + if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */ + outp += sprintf(outp, "%s**.**", (printed++ ? delim : "")); + } else { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + p->gfx_rc6_ms / 10.0 / interval_float); + } + } + + /* GFXMHz */ + if (DO_BIC(BIC_GFXMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); + + /* GFXACTMHz */ + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ + if (DO_BIC(BIC_Totl_c0)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc); + if (DO_BIC(BIC_Any_c0)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc); + if (DO_BIC(BIC_GFX_c0)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc); + if (DO_BIC(BIC_CPUGFX)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc); + + if (DO_BIC(BIC_Pkgpc2)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc); + if (DO_BIC(BIC_Pkgpc3)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc); + if (DO_BIC(BIC_Pkgpc6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc); + if (DO_BIC(BIC_Pkgpc7)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc); + if (DO_BIC(BIC_Pkgpc8)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc); + if (DO_BIC(BIC_Pkgpc9)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc); + if (DO_BIC(BIC_Pkgpc10)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + + if (DO_BIC(BIC_CPU_LPI)) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float); + if (DO_BIC(BIC_SYS_LPI)) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float); + + if (DO_BIC(BIC_PkgWatt)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float); + + if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float); + if (DO_BIC(BIC_GFXWatt)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float); + if (DO_BIC(BIC_RAMWatt)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + p->energy_dram * rapl_dram_energy_units / interval_float); + if (DO_BIC(BIC_Pkg_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units); + if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units); + if (DO_BIC(BIC_GFX_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units); + if (DO_BIC(BIC_RAM_J)) + outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units); + if (DO_BIC(BIC_PKG__)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); + if (DO_BIC(BIC_RAM__)) + outp += + sprintf(outp, fmt8, (printed++ ? delim : ""), + 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); + /* UncMHz */ + if (DO_BIC(BIC_UNCORE_MHZ)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->uncore_mhz); + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) { + if (mp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]); + } else if (mp->format == FORMAT_DELTA) { + if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]); + } else if (mp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc); + } + } + +done: + if (*(outp - 1) != '\n') + outp += sprintf(outp, "\n"); + + return 0; +} + +void flush_output_stdout(void) +{ + FILE *filep; + + if (outf == stderr) + filep = stdout; + else + filep = outf; + + fputs(output_buffer, filep); + fflush(filep); + + outp = output_buffer; +} + +void flush_output_stderr(void) +{ + fputs(output_buffer, outf); + fflush(outf); + outp = output_buffer; +} + +void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + static int count; + + if ((!count || (header_iterations && !(count % header_iterations))) || !summary_only) + print_header("\t"); + + format_counters(&average.threads, &average.cores, &average.packages); + + count++; + + if (summary_only) + return; + + for_all_cpus(format_counters, t, c, p); +} + +#define DELTA_WRAP32(new, old) \ + old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); + +int delta_package(struct pkg_data *new, struct pkg_data *old) +{ + int i; + struct msr_counter *mp; + + if (DO_BIC(BIC_Totl_c0)) + old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) + old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) + old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) + old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; + + old->pc2 = new->pc2 - old->pc2; + if (DO_BIC(BIC_Pkgpc3)) + old->pc3 = new->pc3 - old->pc3; + if (DO_BIC(BIC_Pkgpc6)) + old->pc6 = new->pc6 - old->pc6; + if (DO_BIC(BIC_Pkgpc7)) + old->pc7 = new->pc7 - old->pc7; + old->pc8 = new->pc8 - old->pc8; + old->pc9 = new->pc9 - old->pc9; + old->pc10 = new->pc10 - old->pc10; + old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; + old->sys_lpi = new->sys_lpi - old->sys_lpi; + old->pkg_temp_c = new->pkg_temp_c; + + /* flag an error when rc6 counter resets/wraps */ + if (old->gfx_rc6_ms > new->gfx_rc6_ms) + old->gfx_rc6_ms = -1; + else + old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; + + old->uncore_mhz = new->uncore_mhz; + old->gfx_mhz = new->gfx_mhz; + old->gfx_act_mhz = new->gfx_act_mhz; + + old->energy_pkg = new->energy_pkg - old->energy_pkg; + old->energy_cores = new->energy_cores - old->energy_cores; + old->energy_gfx = new->energy_gfx - old->energy_gfx; + old->energy_dram = new->energy_dram - old->energy_dram; + old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; + old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + + return 0; +} + +void delta_core(struct core_data *new, struct core_data *old) +{ + int i; + struct msr_counter *mp; + + old->c3 = new->c3 - old->c3; + old->c6 = new->c6 - old->c6; + old->c7 = new->c7 - old->c7; + old->core_temp_c = new->core_temp_c; + old->core_throt_cnt = new->core_throt_cnt; + old->mc6_us = new->mc6_us - old->mc6_us; + + DELTA_WRAP32(new->core_energy, old->core_energy); + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } +} + +int soft_c1_residency_display(int bic) +{ + if (!DO_BIC(BIC_CPU_c1) || use_c1_residency_msr) + return 0; + + return DO_BIC_READ(bic); +} + +/* + * old = new - old + */ +int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta) +{ + int i; + struct msr_counter *mp; + + /* we run cpuid just the 1st time, copy the results */ + if (DO_BIC(BIC_APIC)) + new->apic_id = old->apic_id; + if (DO_BIC(BIC_X2APIC)) + new->x2apic_id = old->x2apic_id; + + /* + * the timestamps from start of measurement interval are in "old" + * the timestamp from end of measurement interval are in "new" + * over-write old w/ new so we can print end of interval values + */ + + timersub(&new->tv_begin, &old->tv_begin, &old->tv_delta); + old->tv_begin = new->tv_begin; + old->tv_end = new->tv_end; + + old->tsc = new->tsc - old->tsc; + + /* check for TSC < 1 Mcycles over interval */ + if (old->tsc < (1000 * 1000)) + errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" + "You can disable all c-states by booting with \"idle=poll\"\n" + "or just the deep ones with \"processor.max_cstate=1\""); + + old->c1 = new->c1 - old->c1; + + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { + old->aperf = new->aperf - old->aperf; + old->mperf = new->mperf - old->mperf; + } else { + return -1; + } + } + + if (use_c1_residency_msr) { + /* + * Some models have a dedicated C1 residency MSR, + * which should be more accurate than the derivation below. + */ + } else { + /* + * As counter collection is not atomic, + * it is possible for mperf's non-halted cycles + idle states + * to exceed TSC's all cycles: show c1 = 0% in that case. + */ + if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak)) + old->c1 = 0; + else { + /* normal case, derive c1 */ + old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3 + - core_delta->c6 - core_delta->c7; + } + } + + if (old->mperf == 0) { + if (debug > 1) + fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id); + old->mperf = 1; /* divide by 0 protection */ + } + + if (DO_BIC(BIC_IPC)) + old->instr_count = new->instr_count - old->instr_count; + + if (DO_BIC(BIC_IRQ)) + old->irq_count = new->irq_count - old->irq_count; + + if (DO_BIC(BIC_SMI)) + old->smi_count = new->smi_count - old->smi_count; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + old->counter[i] = new->counter[i]; + else + old->counter[i] = new->counter[i] - old->counter[i]; + } + return 0; +} + +int delta_cpu(struct thread_data *t, struct core_data *c, + struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2) +{ + int retval = 0; + + /* calculate core delta only for 1st thread in core */ + if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) + delta_core(c, c2); + + /* always calculate thread delta */ + retval = delta_thread(t, t2, c2); /* c2 is core delta */ + if (retval) + return retval; + + /* calculate package delta only for 1st core in package */ + if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) + retval = delta_package(p, p2); + + return retval; +} + +void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i; + struct msr_counter *mp; + + t->tv_begin.tv_sec = 0; + t->tv_begin.tv_usec = 0; + t->tv_end.tv_sec = 0; + t->tv_end.tv_usec = 0; + t->tv_delta.tv_sec = 0; + t->tv_delta.tv_usec = 0; + + t->tsc = 0; + t->aperf = 0; + t->mperf = 0; + t->c1 = 0; + + t->instr_count = 0; + + t->irq_count = 0; + t->smi_count = 0; + + /* tells format_counters to dump all fields from this set */ + t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; + + c->c3 = 0; + c->c6 = 0; + c->c7 = 0; + c->mc6_us = 0; + c->core_temp_c = 0; + c->core_energy = 0; + c->core_throt_cnt = 0; + + p->pkg_wtd_core_c0 = 0; + p->pkg_any_core_c0 = 0; + p->pkg_any_gfxe_c0 = 0; + p->pkg_both_core_gfxe_c0 = 0; + + p->pc2 = 0; + if (DO_BIC(BIC_Pkgpc3)) + p->pc3 = 0; + if (DO_BIC(BIC_Pkgpc6)) + p->pc6 = 0; + if (DO_BIC(BIC_Pkgpc7)) + p->pc7 = 0; + p->pc8 = 0; + p->pc9 = 0; + p->pc10 = 0; + p->cpu_lpi = 0; + p->sys_lpi = 0; + + p->energy_pkg = 0; + p->energy_dram = 0; + p->energy_cores = 0; + p->energy_gfx = 0; + p->rapl_pkg_perf_status = 0; + p->rapl_dram_perf_status = 0; + p->pkg_temp_c = 0; + + p->gfx_rc6_ms = 0; + p->uncore_mhz = 0; + p->gfx_mhz = 0; + p->gfx_act_mhz = 0; + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) + t->counter[i] = 0; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) + c->counter[i] = 0; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) + p->counter[i] = 0; +} + +int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i; + struct msr_counter *mp; + + /* copy un-changing apic_id's */ + if (DO_BIC(BIC_APIC)) + average.threads.apic_id = t->apic_id; + if (DO_BIC(BIC_X2APIC)) + average.threads.x2apic_id = t->x2apic_id; + + /* remember first tv_begin */ + if (average.threads.tv_begin.tv_sec == 0) + average.threads.tv_begin = t->tv_begin; + + /* remember last tv_end */ + average.threads.tv_end = t->tv_end; + + average.threads.tsc += t->tsc; + average.threads.aperf += t->aperf; + average.threads.mperf += t->mperf; + average.threads.c1 += t->c1; + + average.threads.instr_count += t->instr_count; + + average.threads.irq_count += t->irq_count; + average.threads.smi_count += t->smi_count; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.threads.counter[i] += t->counter[i]; + } + + /* sum per-core values only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + average.cores.c3 += c->c3; + average.cores.c6 += c->c6; + average.cores.c7 += c->c7; + average.cores.mc6_us += c->mc6_us; + + average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + average.cores.core_throt_cnt = MAX(average.cores.core_throt_cnt, c->core_throt_cnt); + + average.cores.core_energy += c->core_energy; + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.cores.counter[i] += c->counter[i]; + } + + /* sum per-pkg values only for 1st core in pkg */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (DO_BIC(BIC_Totl_c0)) + average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + if (DO_BIC(BIC_Any_c0)) + average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + if (DO_BIC(BIC_GFX_c0)) + average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + if (DO_BIC(BIC_CPUGFX)) + average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; + + average.packages.pc2 += p->pc2; + if (DO_BIC(BIC_Pkgpc3)) + average.packages.pc3 += p->pc3; + if (DO_BIC(BIC_Pkgpc6)) + average.packages.pc6 += p->pc6; + if (DO_BIC(BIC_Pkgpc7)) + average.packages.pc7 += p->pc7; + average.packages.pc8 += p->pc8; + average.packages.pc9 += p->pc9; + average.packages.pc10 += p->pc10; + + average.packages.cpu_lpi = p->cpu_lpi; + average.packages.sys_lpi = p->sys_lpi; + + average.packages.energy_pkg += p->energy_pkg; + average.packages.energy_dram += p->energy_dram; + average.packages.energy_cores += p->energy_cores; + average.packages.energy_gfx += p->energy_gfx; + + average.packages.gfx_rc6_ms = p->gfx_rc6_ms; + average.packages.uncore_mhz = p->uncore_mhz; + average.packages.gfx_mhz = p->gfx_mhz; + average.packages.gfx_act_mhz = p->gfx_act_mhz; + + average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); + + average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; + average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + average.packages.counter[i] += p->counter[i]; + } + return 0; +} + +/* + * sum the counters for all cpus in the system + * compute the weighted average + */ +void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i; + struct msr_counter *mp; + + clear_counters(&average.threads, &average.cores, &average.packages); + + for_all_cpus(sum_counters, t, c, p); + + /* Use the global time delta for the average. */ + average.threads.tv_delta = tv_delta; + + average.threads.tsc /= topo.num_cpus; + average.threads.aperf /= topo.num_cpus; + average.threads.mperf /= topo.num_cpus; + average.threads.instr_count /= topo.num_cpus; + average.threads.c1 /= topo.num_cpus; + + if (average.threads.irq_count > 9999999) + sums_need_wide_columns = 1; + + average.cores.c3 /= topo.num_cores; + average.cores.c6 /= topo.num_cores; + average.cores.c7 /= topo.num_cores; + average.cores.mc6_us /= topo.num_cores; + + if (DO_BIC(BIC_Totl_c0)) + average.packages.pkg_wtd_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_Any_c0)) + average.packages.pkg_any_core_c0 /= topo.num_packages; + if (DO_BIC(BIC_GFX_c0)) + average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + if (DO_BIC(BIC_CPUGFX)) + average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + + average.packages.pc2 /= topo.num_packages; + if (DO_BIC(BIC_Pkgpc3)) + average.packages.pc3 /= topo.num_packages; + if (DO_BIC(BIC_Pkgpc6)) + average.packages.pc6 /= topo.num_packages; + if (DO_BIC(BIC_Pkgpc7)) + average.packages.pc7 /= topo.num_packages; + + average.packages.pc8 /= topo.num_packages; + average.packages.pc9 /= topo.num_packages; + average.packages.pc10 /= topo.num_packages; + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + if (mp->type == COUNTER_ITEMS) { + if (average.threads.counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } + average.threads.counter[i] /= topo.num_cpus; + } + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + if (mp->type == COUNTER_ITEMS) { + if (average.cores.counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.cores.counter[i] /= topo.num_cores; + } + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (mp->format == FORMAT_RAW) + continue; + if (mp->type == COUNTER_ITEMS) { + if (average.packages.counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.packages.counter[i] /= topo.num_packages; + } +} + +static unsigned long long rdtsc(void) +{ + unsigned int low, high; + + asm volatile ("rdtsc":"=a" (low), "=d"(high)); + + return low | ((unsigned long long)high) << 32; +} + +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, mode); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +/* + * snapshot_sysfs_counter() + * + * return snapshot of given counter + */ +unsigned long long snapshot_sysfs_counter(char *path) +{ + FILE *fp; + int retval; + unsigned long long counter; + + fp = fopen_or_die(path, "r"); + + retval = fscanf(fp, "%lld", &counter); + if (retval != 1) + err(1, "snapshot_sysfs_counter(%s)", path); + + fclose(fp); + + return counter; +} + +int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp) +{ + if (mp->msr_num != 0) { + if (get_msr(cpu, mp->msr_num, counterp)) + return -1; + } else { + char path[128 + PATH_BYTES]; + + if (mp->flags & SYSFS_PERCPU) { + sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path); + + *counterp = snapshot_sysfs_counter(path); + } else { + *counterp = snapshot_sysfs_counter(mp->path); + } + } + + return 0; +} + +unsigned long long get_uncore_mhz(int package, int die) +{ + char path[128]; + + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/current_freq_khz", package, + die); + + return (snapshot_sysfs_counter(path) / 1000); +} + +int get_epb(int cpu) +{ + char path[128 + PATH_BYTES]; + unsigned long long msr; + int ret, epb = -1; + FILE *fp; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/power/energy_perf_bias", cpu); + + fp = fopen(path, "r"); + if (!fp) + goto msr_fallback; + + ret = fscanf(fp, "%d", &epb); + if (ret != 1) + err(1, "%s(%s)", __func__, path); + + fclose(fp); + + return epb; + +msr_fallback: + get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr); + + return msr & 0xf; +} + +void get_apic_id(struct thread_data *t) +{ + unsigned int eax, ebx, ecx, edx; + + if (DO_BIC(BIC_APIC)) { + eax = ebx = ecx = edx = 0; + __cpuid(1, eax, ebx, ecx, edx); + + t->apic_id = (ebx >> 24) & 0xff; + } + + if (!DO_BIC(BIC_X2APIC)) + return; + + if (authentic_amd || hygon_genuine) { + unsigned int topology_extensions; + + if (max_extended_level < 0x8000001e) + return; + + eax = ebx = ecx = edx = 0; + __cpuid(0x80000001, eax, ebx, ecx, edx); + topology_extensions = ecx & (1 << 22); + + if (topology_extensions == 0) + return; + + eax = ebx = ecx = edx = 0; + __cpuid(0x8000001e, eax, ebx, ecx, edx); + + t->x2apic_id = eax; + return; + } + + if (!genuine_intel) + return; + + if (max_level < 0xb) + return; + + ecx = 0; + __cpuid(0xb, eax, ebx, ecx, edx); + t->x2apic_id = edx; + + if (debug && (t->apic_id != (t->x2apic_id & 0xff))) + fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id); +} + +int get_core_throt_cnt(int cpu, unsigned long long *cnt) +{ + char path[128 + PATH_BYTES]; + unsigned long long tmp; + FILE *fp; + int ret; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/thermal_throttle/core_throttle_count", cpu); + fp = fopen(path, "r"); + if (!fp) + return -1; + ret = fscanf(fp, "%lld", &tmp); + fclose(fp); + if (ret != 1) + return -1; + *cnt = tmp; + + return 0; +} + +/* + * get_counters(...) + * migrate to cpu + * acquire and record local counters for that cpu + */ +int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int cpu = t->cpu_id; + unsigned long long msr; + int aperf_mperf_retry_count = 0; + struct msr_counter *mp; + int i; + + if (cpu_migrate(cpu)) { + fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); + return -1; + } + + gettimeofday(&t->tv_begin, (struct timezone *)NULL); + + if (first_counter_read) + get_apic_id(t); +retry: + t->tsc = rdtsc(); /* we are running on local CPU of interest */ + + if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) { + unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; + + /* + * The TSC, APERF and MPERF must be read together for + * APERF/MPERF and MPERF/TSC to give accurate results. + * + * Unfortunately, APERF and MPERF are read by + * individual system call, so delays may occur + * between them. If the time to read them + * varies by a large amount, we re-read them. + */ + + /* + * This initial dummy APERF read has been seen to + * reduce jitter in the subsequent reads. + */ + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + + t->tsc = rdtsc(); /* re-read close to APERF */ + + tsc_before = t->tsc; + + if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) + return -3; + + tsc_between = rdtsc(); + + if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) + return -4; + + tsc_after = rdtsc(); + + aperf_time = tsc_between - tsc_before; + mperf_time = tsc_after - tsc_between; + + /* + * If the system call latency to read APERF and MPERF + * differ by more than 2x, then try again. + */ + if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { + aperf_mperf_retry_count++; + if (aperf_mperf_retry_count < 5) + goto retry; + else + warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); + } + aperf_mperf_retry_count = 0; + + t->aperf = t->aperf * aperf_mperf_multiplier; + t->mperf = t->mperf * aperf_mperf_multiplier; + } + + if (DO_BIC(BIC_IPC)) + if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) + return -4; + + if (DO_BIC(BIC_IRQ)) + t->irq_count = irqs_per_cpu[cpu]; + if (DO_BIC(BIC_SMI)) { + if (get_msr(cpu, MSR_SMI_COUNT, &msr)) + return -5; + t->smi_count = msr & 0xFFFFFFFF; + } + if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) { + if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) + return -6; + } + + for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { + if (get_mp(cpu, mp, &t->counter[i])) + return -10; + } + + /* collect core counters only for 1st thread in core */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + goto done; + + if (DO_BIC(BIC_CPU_c3) || soft_c1_residency_display(BIC_CPU_c3)) { + if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) + return -6; + } + + if ((DO_BIC(BIC_CPU_c6) || soft_c1_residency_display(BIC_CPU_c6)) && !do_knl_cstates) { + if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) + return -7; + } else if (do_knl_cstates && soft_c1_residency_display(BIC_CPU_c6)) { + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) + return -7; + } + + if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) { + if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) + return -8; + else if (t->is_atom) { + /* + * For Atom CPUs that has core cstate deeper than c6, + * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper. + * Minus CC7 (and deeper cstates) residency to get + * accturate cc6 residency. + */ + c->c6 -= c->c7; + } + } + + if (DO_BIC(BIC_Mod_c6)) + if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) + return -8; + + if (DO_BIC(BIC_CoreTmp)) { + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return -9; + c->core_temp_c = tj_max - ((msr >> 16) & 0x7F); + } + + if (DO_BIC(BIC_CORE_THROT_CNT)) + get_core_throt_cnt(cpu, &c->core_throt_cnt); + + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr(cpu, MSR_CORE_ENERGY_STAT, &msr)) + return -14; + c->core_energy = msr & 0xFFFFFFFF; + } + + for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { + if (get_mp(cpu, mp, &c->counter[i])) + return -10; + } + + /* collect package counters only for 1st core in package */ + if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + goto done; + + if (DO_BIC(BIC_Totl_c0)) { + if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) + return -10; + } + if (DO_BIC(BIC_Any_c0)) { + if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) + return -11; + } + if (DO_BIC(BIC_GFX_c0)) { + if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) + return -12; + } + if (DO_BIC(BIC_CPUGFX)) { + if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) + return -13; + } + if (DO_BIC(BIC_Pkgpc3)) + if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) + return -9; + if (DO_BIC(BIC_Pkgpc6)) { + if (do_slm_cstates) { + if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } else { + if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } + } + + if (DO_BIC(BIC_Pkgpc2)) + if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) + return -11; + if (DO_BIC(BIC_Pkgpc7)) + if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) + return -12; + if (DO_BIC(BIC_Pkgpc8)) + if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) + return -13; + if (DO_BIC(BIC_Pkgpc9)) + if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9)) + return -13; + if (DO_BIC(BIC_Pkgpc10)) + if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) + return -13; + + if (DO_BIC(BIC_CPU_LPI)) + p->cpu_lpi = cpuidle_cur_cpu_lpi_us; + if (DO_BIC(BIC_SYS_LPI)) + p->sys_lpi = cpuidle_cur_sys_lpi_us; + + if (do_rapl & RAPL_PKG) { + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + return -13; + p->energy_pkg = msr; + } + if (do_rapl & RAPL_CORES_ENERGY_STATUS) { + if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + return -14; + p->energy_cores = msr; + } + if (do_rapl & RAPL_DRAM) { + if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + return -15; + p->energy_dram = msr; + } + if (do_rapl & RAPL_GFX) { + if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + return -16; + p->energy_gfx = msr; + } + if (do_rapl & RAPL_PKG_PERF_STATUS) { + if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) + return -16; + p->rapl_pkg_perf_status = msr; + } + if (do_rapl & RAPL_DRAM_PERF_STATUS) { + if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) + return -16; + p->rapl_dram_perf_status = msr; + } + if (do_rapl & RAPL_AMD_F17H) { + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) + return -13; + p->energy_pkg = msr; + } + if (DO_BIC(BIC_PkgTmp)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return -17; + p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F); + } + + if (DO_BIC(BIC_GFX_rc6)) + p->gfx_rc6_ms = gfx_cur_rc6_ms; + + /* n.b. assume die0 uncore frequency applies to whole package */ + if (DO_BIC(BIC_UNCORE_MHZ)) + p->uncore_mhz = get_uncore_mhz(p->package_id, 0); + + if (DO_BIC(BIC_GFXMHz)) + p->gfx_mhz = gfx_cur_mhz; + + if (DO_BIC(BIC_GFXACTMHz)) + p->gfx_act_mhz = gfx_act_mhz; + + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { + if (get_mp(cpu, mp, &p->counter[i])) + return -10; + } +done: + gettimeofday(&t->tv_end, (struct timezone *)NULL); + + return 0; +} + +/* + * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit: + * If you change the values, note they are used both in comparisons + * (>= PCL__7) and to index pkg_cstate_limit_strings[]. + */ + +#define PCLUKN 0 /* Unknown */ +#define PCLRSV 1 /* Reserved */ +#define PCL__0 2 /* PC0 */ +#define PCL__1 3 /* PC1 */ +#define PCL__2 4 /* PC2 */ +#define PCL__3 5 /* PC3 */ +#define PCL__4 6 /* PC4 */ +#define PCL__6 7 /* PC6 */ +#define PCL_6N 8 /* PC6 No Retention */ +#define PCL_6R 9 /* PC6 Retention */ +#define PCL__7 10 /* PC7 */ +#define PCL_7S 11 /* PC7 Shrink */ +#define PCL__8 12 /* PC8 */ +#define PCL__9 13 /* PC9 */ +#define PCL_10 14 /* PC10 */ +#define PCLUNL 15 /* Unlimited */ + +int pkg_cstate_limit = PCLUKN; +char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited" +}; + +int nhm_pkg_cstate_limits[16] = + { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int snb_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int hsw_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int slv_pkg_cstate_limits[16] = + { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCL__6, PCL__7 +}; + +int amt_pkg_cstate_limits[16] = + { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int phi_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int glm_pkg_cstate_limits[16] = + { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int skx_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +int icx_pkg_cstate_limits[16] = + { PCL__0, PCL__2, PCL__6, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, + PCLRSV, PCLRSV +}; + +static void calculate_tsc_tweak() +{ + tsc_tweak = base_hz / tsc_hz; +} + +void prewake_cstate_probe(unsigned int family, unsigned int model); + +static void dump_nhm_platform_info(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); + + fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr); + + ratio = (msr >> 40) & 0xFF; + fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); + + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); + fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", + base_cpu, msr, msr & 0x2 ? "EN" : "DIS"); + + /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */ + if (dis_cstate_prewake) + fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN"); + + return; +} + +static void dump_hsw_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); + + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk); + return; +} + +static void dump_ivt_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); + + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); + return; +} + +int has_turbo_ratio_group_limits(int family, int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: + case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + default: + return 0; + } +} + +static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model) +{ + unsigned long long msr, core_counts; + int shift; + + get_msr(base_cpu, trl_msr_offset, &msr); + fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n", + base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr); + + if (has_turbo_ratio_group_limits(family, model)) { + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts); + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts); + } else { + core_counts = 0x0807060504030201; + } + + for (shift = 56; shift >= 0; shift -= 8) { + unsigned int ratio, group_size; + + ratio = (msr >> shift) & 0xFF; + group_size = (core_counts >> shift) & 0xFF; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, group_size); + } + + return; +} + +static void dump_atom_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr); + fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); + + ratio = (msr >> 0) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk); + + get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr); + fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF); + + ratio = (msr >> 24) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0x3F; + if (ratio) + fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk); +} + +static void dump_knl_turbo_ratio_limits(void) +{ + const unsigned int buckets_no = 7; + + unsigned long long msr; + int delta_cores, delta_ratio; + int i, b_nr; + unsigned int cores[buckets_no]; + unsigned int ratio[buckets_no]; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); + + fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr); + + /* + * Turbo encoding in KNL is as follows: + * [0] -- Reserved + * [7:1] -- Base value of number of active cores of bucket 1. + * [15:8] -- Base value of freq ratio of bucket 1. + * [20:16] -- +ve delta of number of active cores of bucket 2. + * i.e. active cores of bucket 2 = + * active cores of bucket 1 + delta + * [23:21] -- Negative delta of freq ratio of bucket 2. + * i.e. freq ratio of bucket 2 = + * freq ratio of bucket 1 - delta + * [28:24]-- +ve delta of number of active cores of bucket 3. + * [31:29]-- -ve delta of freq ratio of bucket 3. + * [36:32]-- +ve delta of number of active cores of bucket 4. + * [39:37]-- -ve delta of freq ratio of bucket 4. + * [44:40]-- +ve delta of number of active cores of bucket 5. + * [47:45]-- -ve delta of freq ratio of bucket 5. + * [52:48]-- +ve delta of number of active cores of bucket 6. + * [55:53]-- -ve delta of freq ratio of bucket 6. + * [60:56]-- +ve delta of number of active cores of bucket 7. + * [63:61]-- -ve delta of freq ratio of bucket 7. + */ + + b_nr = 0; + cores[b_nr] = (msr & 0xFF) >> 1; + ratio[b_nr] = (msr >> 8) & 0xFF; + + for (i = 16; i < 64; i += 8) { + delta_cores = (msr >> i) & 0x1F; + delta_ratio = (msr >> (i + 5)) & 0x7; + + cores[b_nr + 1] = cores[b_nr] + delta_cores; + ratio[b_nr + 1] = ratio[b_nr] - delta_ratio; + b_nr++; + } + + for (i = buckets_no - 1; i >= 0; i--) + if (i > 0 ? ratio[i] != ratio[i - 1] : 1) + fprintf(outf, + "%d * %.1f = %.1f MHz max turbo %d active cores\n", + ratio[i], bclk, ratio[i] * bclk, cores[i]); +} + +static void dump_nhm_cst_cfg(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); + + fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr); + + fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]); + +#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16) + if (has_automatic_cstate_conversion) { + fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off"); + } + + fprintf(outf, ")\n"); + + return; +} + +static void dump_config_tdp(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr); + fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF); + + get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr); + if (msr) { + fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF); + } + fprintf(outf, ")\n"); + + get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr); + if (msr) { + fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF); + fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF); + fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF); + fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF); + } + fprintf(outf, ")\n"); + + get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr); + fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr); + if ((msr) & 0x3) + fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); + + get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr); + fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr); + fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF); + fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1); + fprintf(outf, ")\n"); +} + +unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 }; + +void print_irtl(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_PKGC3_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC6_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC7_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + if (!do_irtl_hsw) + return; + + get_msr(base_cpu, MSR_PKGC8_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC9_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + + get_msr(base_cpu, MSR_PKGC10_IRTL, &msr); + fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr); + fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT", + (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]); + +} + +void free_fd_percpu(void) +{ + int i; + + for (i = 0; i < topo.max_cpu_num + 1; ++i) { + if (fd_percpu[i] != 0) + close(fd_percpu[i]); + } + + free(fd_percpu); +} + +void free_all_buffers(void) +{ + int i; + + CPU_FREE(cpu_present_set); + cpu_present_set = NULL; + cpu_present_setsize = 0; + + CPU_FREE(cpu_affinity_set); + cpu_affinity_set = NULL; + cpu_affinity_setsize = 0; + + free(thread_even); + free(core_even); + free(package_even); + + thread_even = NULL; + core_even = NULL; + package_even = NULL; + + free(thread_odd); + free(core_odd); + free(package_odd); + + thread_odd = NULL; + core_odd = NULL; + package_odd = NULL; + + free(output_buffer); + output_buffer = NULL; + outp = NULL; + + free_fd_percpu(); + + free(irq_column_2_cpu); + free(irqs_per_cpu); + + for (i = 0; i <= topo.max_cpu_num; ++i) { + if (cpus[i].put_ids) + CPU_FREE(cpus[i].put_ids); + } + free(cpus); +} + +/* + * Parse a file containing a single int. + * Return 0 if file can not be opened + * Exit if file can be opened, but can not be parsed + */ +int parse_int_file(const char *fmt, ...) +{ + va_list args; + char path[PATH_MAX]; + FILE *filep; + int value; + + va_start(args, fmt); + vsnprintf(path, sizeof(path), fmt, args); + va_end(args); + filep = fopen(path, "r"); + if (!filep) + return 0; + if (fscanf(filep, "%d", &value) != 1) + err(1, "%s: failed to parse number from file", path); + fclose(filep); + return value; +} + +/* + * cpu_is_first_core_in_package(cpu) + * return 1 if given CPU is 1st core in package + */ +int cpu_is_first_core_in_package(int cpu) +{ + return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); +} + +int get_physical_package_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); +} + +int get_die_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/die_id", cpu); +} + +int get_core_id(int cpu) +{ + return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); +} + +void set_node_data(void) +{ + int pkg, node, lnode, cpu, cpux; + int cpu_count; + + /* initialize logical_node_id */ + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) + cpus[cpu].logical_node_id = -1; + + cpu_count = 0; + for (pkg = 0; pkg < topo.num_packages; pkg++) { + lnode = 0; + for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) { + if (cpus[cpu].physical_package_id != pkg) + continue; + /* find a cpu with an unset logical_node_id */ + if (cpus[cpu].logical_node_id != -1) + continue; + cpus[cpu].logical_node_id = lnode; + node = cpus[cpu].physical_node_id; + cpu_count++; + /* + * find all matching cpus on this pkg and set + * the logical_node_id + */ + for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) { + if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) { + cpus[cpux].logical_node_id = lnode; + cpu_count++; + } + } + lnode++; + if (lnode > topo.nodes_per_pkg) + topo.nodes_per_pkg = lnode; + } + if (cpu_count >= topo.max_cpu_num) + break; + } +} + +int get_physical_node_id(struct cpu_topology *thiscpu) +{ + char path[80]; + FILE *filep; + int i; + int cpu = thiscpu->logical_cpu_id; + + for (i = 0; i <= topo.max_cpu_num; i++) { + sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i); + filep = fopen(path, "r"); + if (!filep) + continue; + fclose(filep); + return i; + } + return -1; +} + +int get_thread_siblings(struct cpu_topology *thiscpu) +{ + char path[80], character; + FILE *filep; + unsigned long map; + int so, shift, sib_core; + int cpu = thiscpu->logical_cpu_id; + int offset = topo.max_cpu_num + 1; + size_t size; + int thread_id = 0; + + thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1)); + if (thiscpu->thread_id < 0) + thiscpu->thread_id = thread_id++; + if (!thiscpu->put_ids) + return -1; + + size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(size, thiscpu->put_ids); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } + do { + offset -= BITMASK_SIZE; + if (fscanf(filep, "%lx%c", &map, &character) != 2) + err(1, "%s: failed to parse file", path); + for (shift = 0; shift < BITMASK_SIZE; shift++) { + if ((map >> shift) & 0x1) { + so = shift + offset; + sib_core = get_core_id(so); + if (sib_core == thiscpu->physical_core_id) { + CPU_SET_S(so, size, thiscpu->put_ids); + if ((so != cpu) && (cpus[so].thread_id < 0)) + cpus[so].thread_id = thread_id++; + } + } + } + } while (character == ','); + fclose(filep); + + return CPU_COUNT_S(size, thiscpu->put_ids); +} + +/* + * run func(thread, core, package) in topology order + * skip non-present cpus + */ + +int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *, + struct pkg_data *, struct thread_data *, struct core_data *, + struct pkg_data *), struct thread_data *thread_base, + struct core_data *core_base, struct pkg_data *pkg_base, + struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2) +{ + int retval, pkg_no, node_no, core_no, thread_no; + + for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { + for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) { + for (core_no = 0; core_no < topo.cores_per_node; ++core_no) { + for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) { + struct thread_data *t, *t2; + struct core_data *c, *c2; + struct pkg_data *p, *p2; + + t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no); + + if (cpu_is_not_present(t->cpu_id)) + continue; + + t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no); + + c = GET_CORE(core_base, core_no, node_no, pkg_no); + c2 = GET_CORE(core_base2, core_no, node_no, pkg_no); + + p = GET_PKG(pkg_base, pkg_no); + p2 = GET_PKG(pkg_base2, pkg_no); + + retval = func(t, c, p, t2, c2, p2); + if (retval) + return retval; + } + } + } + } + return 0; +} + +/* + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number + */ +int for_all_proc_cpus(int (func) (int)) +{ + FILE *fp; + int cpu_num; + int retval; + + fp = fopen_or_die(proc_stat, "r"); + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) + err(1, "%s: failed to parse format", proc_stat); + + while (1) { + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); + if (retval != 1) + break; + + retval = func(cpu_num); + if (retval) { + fclose(fp); + return (retval); + } + } + fclose(fp); + return 0; +} + +void re_initialize(void) +{ + free_all_buffers(); + setup_all_buffers(); + fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); +} + +void set_max_cpu_num(void) +{ + FILE *filep; + int base_cpu; + unsigned long dummy; + char pathname[64]; + + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(1, "cannot find calling cpu ID"); + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu); + + filep = fopen_or_die(pathname, "r"); + topo.max_cpu_num = 0; + while (fscanf(filep, "%lx,", &dummy) == 1) + topo.max_cpu_num += BITMASK_SIZE; + fclose(filep); + topo.max_cpu_num--; /* 0 based */ +} + +/* + * count_cpus() + * remember the last one seen, it will be the max + */ +int count_cpus(int cpu) +{ + UNUSED(cpu); + + topo.num_cpus++; + return 0; +} + +int mark_cpu_present(int cpu) +{ + CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); + return 0; +} + +int init_thread_id(int cpu) +{ + cpus[cpu].thread_id = -1; + return 0; +} + +/* + * snapshot_proc_interrupts() + * + * read and record summary of /proc/interrupts + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_proc_interrupts(void) +{ + static FILE *fp; + int column, retval; + + if (fp == NULL) + fp = fopen_or_die("/proc/interrupts", "r"); + else + rewind(fp); + + /* read 1st line of /proc/interrupts to get cpu* name for each column */ + for (column = 0; column < topo.num_cpus; ++column) { + int cpu_number; + + retval = fscanf(fp, " CPU%d", &cpu_number); + if (retval != 1) + break; + + if (cpu_number > topo.max_cpu_num) { + warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num); + return 1; + } + + irq_column_2_cpu[column] = cpu_number; + irqs_per_cpu[cpu_number] = 0; + } + + /* read /proc/interrupt count lines and sum up irqs per cpu */ + while (1) { + int column; + char buf[64]; + + retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */ + if (retval != 1) + break; + + /* read the count per cpu */ + for (column = 0; column < topo.num_cpus; ++column) { + + int cpu_number, irq_count; + + retval = fscanf(fp, " %d", &irq_count); + if (retval != 1) + break; + + cpu_number = irq_column_2_cpu[column]; + irqs_per_cpu[cpu_number] += irq_count; + + } + + while (getc(fp) != '\n') ; /* flush interrupt description */ + + } + return 0; +} + +/* + * snapshot_gfx_rc6_ms() + * + * record snapshot of + * /sys/class/drm/card0/power/rc6_residency_ms + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_rc6_ms(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r"); + + retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms); + if (retval != 1) + err(1, "GFX rc6"); + + fclose(fp); + + return 0; +} + +/* + * snapshot_gfx_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_cur_mhz); + if (retval != 1) + err(1, "GFX MHz"); + + return 0; +} + +/* + * snapshot_gfx_cur_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_act_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_act_mhz); + if (retval != 1) + err(1, "GFX ACT MHz"); + + return 0; +} + +/* + * snapshot_cpu_lpi() + * + * record snapshot of + * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us + */ +int snapshot_cpu_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us); + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle CPU output\n"); + BIC_NOT_PRESENT(BIC_CPU_LPI); + fclose(fp); + return -1; + } + + fclose(fp); + + return 0; +} + +/* + * snapshot_sys_lpi() + * + * record snapshot of sys_lpi_file + */ +int snapshot_sys_lpi_us(void) +{ + FILE *fp; + int retval; + + fp = fopen_or_die(sys_lpi_file, "r"); + + retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us); + if (retval != 1) { + fprintf(stderr, "Disabling Low Power Idle System output\n"); + BIC_NOT_PRESENT(BIC_SYS_LPI); + fclose(fp); + return -1; + } + fclose(fp); + + return 0; +} + +/* + * snapshot /proc and /sys files + * + * return 1 if configuration restart needed, else return 0 + */ +int snapshot_proc_sysfs_files(void) +{ + if (DO_BIC(BIC_IRQ)) + if (snapshot_proc_interrupts()) + return 1; + + if (DO_BIC(BIC_GFX_rc6)) + snapshot_gfx_rc6_ms(); + + if (DO_BIC(BIC_GFXMHz)) + snapshot_gfx_mhz(); + + if (DO_BIC(BIC_GFXACTMHz)) + snapshot_gfx_act_mhz(); + + if (DO_BIC(BIC_CPU_LPI)) + snapshot_cpu_lpi_us(); + + if (DO_BIC(BIC_SYS_LPI)) + snapshot_sys_lpi_us(); + + return 0; +} + +int exit_requested; + +static void signal_handler(int signal) +{ + switch (signal) { + case SIGINT: + exit_requested = 1; + if (debug) + fprintf(stderr, " SIGINT\n"); + break; + case SIGUSR1: + if (debug > 1) + fprintf(stderr, "SIGUSR1\n"); + break; + } +} + +void setup_signal_handler(void) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + + sa.sa_handler = &signal_handler; + + if (sigaction(SIGINT, &sa, NULL) < 0) + err(1, "sigaction SIGINT"); + if (sigaction(SIGUSR1, &sa, NULL) < 0) + err(1, "sigaction SIGUSR1"); +} + +void do_sleep(void) +{ + struct timeval tout; + struct timespec rest; + fd_set readfds; + int retval; + + FD_ZERO(&readfds); + FD_SET(0, &readfds); + + if (ignore_stdin) { + nanosleep(&interval_ts, NULL); + return; + } + + tout = interval_tv; + retval = select(1, &readfds, NULL, NULL, &tout); + + if (retval == 1) { + switch (getc(stdin)) { + case 'q': + exit_requested = 1; + break; + case EOF: + /* + * 'stdin' is a pipe closed on the other end. There + * won't be any further input. + */ + ignore_stdin = 1; + /* Sleep the rest of the time */ + rest.tv_sec = (tout.tv_sec + tout.tv_usec / 1000000); + rest.tv_nsec = (tout.tv_usec % 1000000) * 1000; + nanosleep(&rest, NULL); + } + } +} + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) +{ + int ret, idx; + unsigned long long msr_cur, msr_last; + + if (!per_cpu_msr_sum) + return 1; + + idx = offset_to_idx(offset); + if (idx < 0) + return idx; + /* get_msr_sum() = sum + (get_msr() - last) */ + ret = get_msr(cpu, offset, &msr_cur); + if (ret) + return ret; + msr_last = per_cpu_msr_sum[cpu].entries[idx].last; + DELTA_WRAP32(msr_cur, msr_last); + *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; + + return 0; +} + +timer_t timerid; + +/* Timer callback, update the sum of MSRs periodically. */ +static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i, ret; + int cpu = t->cpu_id; + + UNUSED(c); + UNUSED(p); + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; + off_t offset; + + if (!idx_valid(i)) + continue; + offset = idx_to_offset(i); + if (offset < 0) + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { + fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset); + continue; + } + + msr_last = per_cpu_msr_sum[cpu].entries[i].last; + per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; + + DELTA_WRAP32(msr_cur, msr_last); + per_cpu_msr_sum[cpu].entries[i].sum += msr_last; + } + return 0; +} + +static void msr_record_handler(union sigval v) +{ + UNUSED(v); + + for_all_cpus(update_msr_sum, EVEN_COUNTERS); +} + +void msr_sum_record(void) +{ + struct itimerspec its; + struct sigevent sev; + + per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); + if (!per_cpu_msr_sum) { + fprintf(outf, "Can not allocate memory for long time MSR.\n"); + return; + } + /* + * Signal handler might be restricted, so use thread notifier instead. + */ + memset(&sev, 0, sizeof(struct sigevent)); + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = msr_record_handler; + + sev.sigev_value.sival_ptr = &timerid; + if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { + fprintf(outf, "Can not create timer.\n"); + goto release_msr; + } + + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + /* + * A wraparound time has been calculated early. + * Some sources state that the peak power for a + * microprocessor is usually 1.5 times the TDP rating, + * use 2 * TDP for safety. + */ + its.it_interval.tv_sec = rapl_joule_counter_range / 2; + its.it_interval.tv_nsec = 0; + + if (timer_settime(timerid, 0, &its, NULL) == -1) { + fprintf(outf, "Can not set timer.\n"); + goto release_timer; + } + return; + +release_timer: + timer_delete(timerid); +release_msr: + free(per_cpu_msr_sum); +} + +/* + * set_my_sched_priority(pri) + * return previous + * + * if non-root, do this: + * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat + */ +int set_my_sched_priority(int priority) +{ + int retval; + int original_priority; + + errno = 0; + original_priority = getpriority(PRIO_PROCESS, 0); + if (errno && (original_priority == -1)) + err(errno, "getpriority"); + + retval = setpriority(PRIO_PROCESS, 0, priority); + if (retval) + err(retval, "setpriority(%d)", priority); + + errno = 0; + retval = getpriority(PRIO_PROCESS, 0); + if (retval != priority) + err(retval, "getpriority(%d) != setpriority(%d)", retval, priority); + + return original_priority; +} + +void turbostat_loop() +{ + int retval; + int restarted = 0; + unsigned int done_iters = 0; + + setup_signal_handler(); + + /* + * elevate own priority for interval mode + */ + set_my_sched_priority(-20); + +restart: + restarted++; + + snapshot_proc_sysfs_files(); + retval = for_all_cpus(get_counters, EVEN_COUNTERS); + first_counter_read = 0; + if (retval < -1) { + exit(retval); + } else if (retval == -1) { + if (restarted > 10) { + exit(retval); + } + re_initialize(); + goto restart; + } + restarted = 0; + done_iters = 0; + gettimeofday(&tv_even, (struct timezone *)NULL); + + while (1) { + if (for_all_proc_cpus(cpu_is_not_present)) { + re_initialize(); + goto restart; + } + do_sleep(); + if (snapshot_proc_sysfs_files()) + goto restart; + retval = for_all_cpus(get_counters, ODD_COUNTERS); + if (retval < -1) { + exit(retval); + } else if (retval == -1) { + re_initialize(); + goto restart; + } + gettimeofday(&tv_odd, (struct timezone *)NULL); + timersub(&tv_odd, &tv_even, &tv_delta); + if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) { + re_initialize(); + goto restart; + } + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); + flush_output_stdout(); + if (exit_requested) + break; + if (num_iterations && ++done_iters >= num_iterations) + break; + do_sleep(); + if (snapshot_proc_sysfs_files()) + goto restart; + retval = for_all_cpus(get_counters, EVEN_COUNTERS); + if (retval < -1) { + exit(retval); + } else if (retval == -1) { + re_initialize(); + goto restart; + } + gettimeofday(&tv_even, (struct timezone *)NULL); + timersub(&tv_even, &tv_odd, &tv_delta); + if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) { + re_initialize(); + goto restart; + } + compute_average(ODD_COUNTERS); + format_all_counters(ODD_COUNTERS); + flush_output_stdout(); + if (exit_requested) + break; + if (num_iterations && ++done_iters >= num_iterations) + break; + } +} + +void check_dev_msr() +{ + struct stat sb; + char pathname[32]; + + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); +} + +/* + * check for CAP_SYS_RAWIO + * return 0 on success + * return 1 on fail + */ +int check_for_cap_sys_rawio(void) +{ + cap_t caps; + cap_flag_value_t cap_flag_value; + + caps = cap_get_proc(); + if (caps == NULL) + err(-6, "cap_get_proc\n"); + + if (cap_get_flag(caps, CAP_SYS_RAWIO, CAP_EFFECTIVE, &cap_flag_value)) + err(-6, "cap_get\n"); + + if (cap_flag_value != CAP_SET) { + warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname); + return 1; + } + + if (cap_free(caps) == -1) + err(-6, "cap_free\n"); + + return 0; +} + +void check_permissions(void) +{ + int do_exit = 0; + char pathname[32]; + + /* check for CAP_SYS_RAWIO */ + do_exit += check_for_cap_sys_rawio(); + + /* test file permissions */ + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (euidaccess(pathname, R_OK)) { + do_exit++; + warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); + } + + /* if all else fails, thell them to be root */ + if (do_exit) + if (getuid() != 0) + warnx("... or simply run as root"); + + if (do_exit) + exit(-6); +} + +/* + * NHM adds support for additional MSRs: + * + * MSR_SMI_COUNT 0x00000034 + * + * MSR_PLATFORM_INFO 0x000000ce + * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 + * + * MSR_MISC_PWR_MGMT 0x000001aa + * + * MSR_PKG_C3_RESIDENCY 0x000003f8 + * MSR_PKG_C6_RESIDENCY 0x000003f9 + * MSR_CORE_C3_RESIDENCY 0x000003fc + * MSR_CORE_C6_RESIDENCY 0x000003fd + * + * Side effect: + * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL + * sets has_misc_feature_control + */ +int probe_nhm_msrs(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int base_ratio; + int *pkg_cstate_limits; + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + bclk = discover_bclk(family, model); + + switch (model) { + case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ + case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ + pkg_cstate_limits = nhm_pkg_cstate_limits; + break; + case INTEL_FAM6_SANDYBRIDGE: /* SNB */ + case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */ + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + pkg_cstate_limits = snb_pkg_cstate_limits; + has_misc_feature_control = 1; + break; + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_BROADWELL_G: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + pkg_cstate_limits = hsw_pkg_cstate_limits; + has_misc_feature_control = 1; + break; + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ + pkg_cstate_limits = skx_pkg_cstate_limits; + has_misc_feature_control = 1; + break; + case INTEL_FAM6_ICELAKE_X: /* ICX */ + pkg_cstate_limits = icx_pkg_cstate_limits; + has_misc_feature_control = 1; + break; + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ + no_MSR_MISC_PWR_MGMT = 1; + /* FALLTHRU */ + case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ + pkg_cstate_limits = slv_pkg_cstate_limits; + break; + case INTEL_FAM6_ATOM_AIRMONT: /* AMT */ + pkg_cstate_limits = amt_pkg_cstate_limits; + no_MSR_MISC_PWR_MGMT = 1; + break; + case INTEL_FAM6_XEON_PHI_KNL: /* PHI */ + pkg_cstate_limits = phi_pkg_cstate_limits; + break; + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + pkg_cstate_limits = glm_pkg_cstate_limits; + break; + default: + return 0; + } + get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr); + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; + + get_msr(base_cpu, MSR_PLATFORM_INFO, &msr); + base_ratio = (msr >> 8) & 0xFF; + + base_hz = base_ratio * bclk * 1000000; + has_base_hz = 1; + return 1; +} + +/* + * SLV client has support for unique MSRs: + * + * MSR_CC6_DEMOTION_POLICY_CONFIG + * MSR_MC6_DEMOTION_POLICY_CONFIG + */ + +int has_slv_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_MID: + case INTEL_FAM6_ATOM_AIRMONT_MID: + return 1; + } + return 0; +} + +int is_dnv(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_GOLDMONT_D: + return 1; + } + return 0; +} + +int is_bdx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_BROADWELL_X: + return 1; + } + return 0; +} + +int is_skx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_SKYLAKE_X: + return 1; + } + return 0; +} + +int is_icx(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ICELAKE_X: + return 1; + } + return 0; +} + +int is_spr(unsigned int family, unsigned int model) +{ + + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_SAPPHIRERAPIDS_X: + return 1; + } + return 0; +} + +int is_ehl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT: + return 1; + } + return 0; +} + +int is_jvl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + } + return 0; +} + +int has_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (has_slv_msrs(family, model)) + return 0; + + if (family != 6) + return 0; + + switch (model) { + /* Nehalem compatible, but do not include turbo-ratio limit support */ + case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ + case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */ + return 0; + default: + return 1; + } +} + +int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (has_slv_msrs(family, model)) + return 1; + + return 0; +} + +int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ + return 1; + default: + return 0; + } +} + +int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_HASWELL_X: /* HSW Xeon */ + return 1; + default: + return 0; + } +} + +int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ + return 1; + default: + return 0; + } +} + +int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_GOLDMONT: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: + return 1; + default: + return 0; + } +} + +int has_config_tdp(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_BROADWELL_G: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ + case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ + case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */ + return 1; + default: + return 0; + } +} + +/* + * tcc_offset_bits: + * 0: Tcc Offset not supported (Default) + * 6: Bit 29:24 of MSR_PLATFORM_INFO + * 4: Bit 27:24 of MSR_PLATFORM_INFO + */ +void check_tcc_offset(int model) +{ + unsigned long long msr; + + if (!genuine_intel) + return; + + switch (model) { + case INTEL_FAM6_SKYLAKE_L: + case INTEL_FAM6_SKYLAKE: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_COMETLAKE: + if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) { + msr = (msr >> 30) & 1; + if (msr) + tcc_offset_bits = 6; + } + return; + default: + return; + } +} + +static void remove_underbar(char *s) +{ + char *to = s; + + while (*s) { + if (*s != '_') + *to++ = *s; + s++; + } + + *to = 0; +} + +static void dump_turbo_ratio_info(unsigned int family, unsigned int model) +{ + if (!has_turbo) + return; + + if (has_hsw_turbo_ratio_limit(family, model)) + dump_hsw_turbo_ratio_limits(); + + if (has_ivt_turbo_ratio_limit(family, model)) + dump_ivt_turbo_ratio_limits(); + + if (has_turbo_ratio_limit(family, model)) { + dump_turbo_ratio_limits(MSR_TURBO_RATIO_LIMIT, family, model); + + if (is_hybrid) + dump_turbo_ratio_limits(MSR_SECONDARY_TURBO_RATIO_LIMIT, family, model); + } + + if (has_atom_turbo_ratio_limit(family, model)) + dump_atom_turbo_ratio_limits(); + + if (has_knl_turbo_ratio_limit(family, model)) + dump_knl_turbo_ratio_limits(); + + if (has_config_tdp(family, model)) + dump_config_tdp(); +} + +static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) +{ + if (!do_nhm_platform_info) + return; + + dump_nhm_platform_info(); + dump_turbo_ratio_info(family, model); + dump_nhm_cst_cfg(); +} + +static int read_sysfs_int(char *path) +{ + FILE *input; + int retval = -1; + + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return (-1); + } + if (fscanf(input, "%d", &retval) != 1) + err(1, "%s: failed to read int from file", path); + fclose(input); + + return (retval); +} + +static void dump_sysfs_file(char *path) +{ + FILE *input; + char cpuidle_buf[64]; + + input = fopen(path, "r"); + if (input == NULL) { + if (debug) + fprintf(outf, "NSFOD %s\n", path); + return; + } + if (!fgets(cpuidle_buf, sizeof(cpuidle_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf); +} + +static void intel_uncore_frequency_probe(void) +{ + int i, j; + char path[128]; + + if (!genuine_intel) + return; + + if (access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00", R_OK)) + return; + + if (!access("/sys/devices/system/cpu/intel_uncore_frequency/package_00_die_00/current_freq_khz", R_OK)) + BIC_PRESENT(BIC_UNCORE_MHZ); + + if (quiet) + return; + + for (i = 0; i < topo.num_packages; ++i) { + for (j = 0; j < topo.num_die; ++j) { + int k, l; + + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/min_freq_khz", + i, j); + k = read_sysfs_int(path); + sprintf(path, "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/max_freq_khz", + i, j); + l = read_sysfs_int(path); + fprintf(outf, "Uncore Frequency pkg%d die%d: %d - %d MHz ", i, j, k / 1000, l / 1000); + + sprintf(path, + "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_min_freq_khz", + i, j); + k = read_sysfs_int(path); + sprintf(path, + "/sys/devices/system/cpu/intel_uncore_frequency/package_0%d_die_0%d/initial_max_freq_khz", + i, j); + l = read_sysfs_int(path); + fprintf(outf, "(%d - %d MHz)\n", k / 1000, l / 1000); + } + } +} + +static void dump_sysfs_cstate_config(void) +{ + char path[64]; + char name_buf[16]; + char desc[64]; + FILE *input; + int state; + char *sp; + + if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { + fprintf(outf, "cpuidle not loaded\n"); + return; + } + + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_driver"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor"); + dump_sysfs_file("/sys/devices/system/cpu/cpuidle/current_governor_ro"); + + for (state = 0; state < 10; ++state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); + + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '\0'; + fclose(input); + + remove_underbar(name_buf); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + if (!fgets(desc, sizeof(desc), input)) + err(1, "%s: failed to read file", path); + + fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc); + fclose(input); + } +} + +static void dump_sysfs_pstate_config(void) +{ + char path[64]; + char driver_buf[64]; + char governor_buf[64]; + FILE *input; + int turbo; + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu); + input = fopen(path, "r"); + if (input == NULL) { + fprintf(outf, "NSFOD %s\n", path); + return; + } + if (!fgets(driver_buf, sizeof(driver_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu); + input = fopen(path, "r"); + if (input == NULL) { + fprintf(outf, "NSFOD %s\n", path); + return; + } + if (!fgets(governor_buf, sizeof(governor_buf), input)) + err(1, "%s: failed to read file", path); + fclose(input); + + fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf); + fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf); + + sprintf(path, "/sys/devices/system/cpu/cpufreq/boost"); + input = fopen(path, "r"); + if (input != NULL) { + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); + fprintf(outf, "cpufreq boost: %d\n", turbo); + fclose(input); + } + + sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo"); + input = fopen(path, "r"); + if (input != NULL) { + if (fscanf(input, "%d", &turbo) != 1) + err(1, "%s: failed to parse number from file", path); + fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo); + fclose(input); + } +} + +/* + * print_epb() + * Decode the ENERGY_PERF_BIAS MSR + */ +int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + char *epb_string; + int cpu, epb; + + UNUSED(c); + UNUSED(p); + + if (!has_epb) + return 0; + + cpu = t->cpu_id; + + /* EPB is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); + return -1; + } + + epb = get_epb(cpu); + if (epb < 0) + return 0; + + switch (epb) { + case ENERGY_PERF_BIAS_PERFORMANCE: + epb_string = "performance"; + break; + case ENERGY_PERF_BIAS_NORMAL: + epb_string = "balanced"; + break; + case ENERGY_PERF_BIAS_POWERSAVE: + epb_string = "powersave"; + break; + default: + epb_string = "custom"; + break; + } + fprintf(outf, "cpu%d: EPB: %d (%s)\n", cpu, epb, epb_string); + + return 0; +} + +/* + * print_hwp() + * Decode the MSR_HWP_CAPABILITIES + */ +int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + UNUSED(c); + UNUSED(p); + + if (!has_hwp) + return 0; + + cpu = t->cpu_id; + + /* MSR_HWP_CAPABILITIES is per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (get_msr(cpu, MSR_PM_ENABLE, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-"); + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + if ((msr & (1 << 0)) == 0) + return 0; + + if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx " + "(high %d guar %d eff %d low %d)\n", + cpu, msr, + (unsigned int)HWP_HIGHEST_PERF(msr), + (unsigned int)HWP_GUARANTEED_PERF(msr), + (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr)); + + if (get_msr(cpu, MSR_HWP_REQUEST, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx " + "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), + (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1)); + + if (has_hwp_pkg) { + if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx " + "(min %d max %d des %d epp 0x%x window 0x%x)\n", + cpu, msr, + (unsigned int)(((msr) >> 0) & 0xff), + (unsigned int)(((msr) >> 8) & 0xff), + (unsigned int)(((msr) >> 16) & 0xff), + (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3)); + } + if (has_hwp_notify) { + if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx " + "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n", + cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis"); + } + if (get_msr(cpu, MSR_HWP_STATUS, &msr)) + return 0; + + fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx " + "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n", + cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-"); + + return 0; +} + +/* + * print_perf_limit() + */ +int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + int cpu; + + UNUSED(c); + UNUSED(p); + + cpu = t->cpu_id; + + /* per-package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_core_perf_limit_reasons) { + get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); + fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", + (msr & 1 << 15) ? "bit15, " : "", + (msr & 1 << 14) ? "bit14, " : "", + (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 12) ? "MultiCoreTurbo, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 9) ? "CorePwr, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 5) ? "Auto-HWP, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 2) ? "bit2, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : ""); + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", + (msr & 1 << 31) ? "bit31, " : "", + (msr & 1 << 30) ? "bit30, " : "", + (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 28) ? "MultiCoreTurbo, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 25) ? "CorePwr, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 21) ? "Auto-HWP, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 18) ? "bit18, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : ""); + + } + if (do_gfx_perf_limit_reasons) { + get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr); + fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 9) ? "GFXPwr, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 25) ? "GFXPwr, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + if (do_ring_perf_limit_reasons) { + get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr); + fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); + fprintf(outf, " (Active: %s%s%s%s%s%s)", + (msr & 1 << 0) ? "PROCHOT, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : ""); + fprintf(outf, " (Logged: %s%s%s%s%s%s)\n", + (msr & 1 << 16) ? "PROCHOT, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : ""); + } + return 0; +} + +#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */ +#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */ + +double get_tdp_intel(unsigned int model) +{ + unsigned long long msr; + + if (do_rapl & RAPL_PKG_POWER_INFO) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) + return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + + switch (model) { + case INTEL_FAM6_ATOM_SILVERMONT: + case INTEL_FAM6_ATOM_SILVERMONT_D: + return 30.0; + default: + return 135.0; + } +} + +double get_tdp_amd(unsigned int family) +{ + UNUSED(family); + + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ + return 280.0; +} + +/* + * rapl_dram_energy_units_probe() + * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. + */ +static double rapl_dram_energy_units_probe(int model, double rapl_energy_units) +{ + /* only called for genuine_intel, family 6 */ + + switch (model) { + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ + return (rapl_dram_energy_units = 15.3 / 1000000); + default: + return (rapl_energy_units); + } +} + +void rapl_probe_intel(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int time_unit; + double tdp; + + if (family != 6) + return; + + switch (model) { + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_IVYBRIDGE: + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_BROADWELL_G: /* BDW */ + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_GFXWatt); + } + break; + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO; + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS + | RAPL_GFX | RAPL_PKG_POWER_INFO; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + BIC_PRESENT(BIC_GFXWatt); + } + break; + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS + | RAPL_GFX | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + BIC_PRESENT(BIC_GFX_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + BIC_PRESENT(BIC_GFXWatt); + } + break; + case INTEL_FAM6_HASWELL_X: /* HSX */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ + case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + do_rapl = + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | + RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_RAMWatt); + } + break; + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE_X: + do_rapl = + RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | + RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + } + break; + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ + case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ + do_rapl = RAPL_PKG | RAPL_CORES; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + } + break; + case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + do_rapl = + RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | + RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS; + BIC_PRESENT(BIC_PKG__); + BIC_PRESENT(BIC_RAM__); + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + BIC_PRESENT(BIC_RAM_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + BIC_PRESENT(BIC_RAMWatt); + } + break; + default: + return; + } + + /* units on package 0, verify later other packages match */ + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) + return; + + rapl_power_units = 1.0 / (1 << (msr & 0xF)); + if (model == INTEL_FAM6_ATOM_SILVERMONT) + rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; + else + rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + + rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + + time_unit = msr >> 16 & 0xF; + if (time_unit == 0) + time_unit = 0xA; + + rapl_time_units = 1.0 / (1 << (time_unit)); + + tdp = get_tdp_intel(model); + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + if (!quiet) + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} + +void rapl_probe_amd(unsigned int family, unsigned int model) +{ + unsigned long long msr; + unsigned int eax, ebx, ecx, edx; + unsigned int has_rapl = 0; + double tdp; + + UNUSED(model); + + if (max_extended_level >= 0x80000007) { + __cpuid(0x80000007, eax, ebx, ecx, edx); + /* RAPL (Fam 17h+) */ + has_rapl = edx & (1 << 14); + } + + if (!has_rapl || family < 0x17) + return; + + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); + } + + if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) + return; + + rapl_time_units = ldexp(1.0, -(msr >> 16 & 0xf)); + rapl_energy_units = ldexp(1.0, -(msr >> 8 & 0x1f)); + rapl_power_units = ldexp(1.0, -(msr & 0xf)); + + tdp = get_tdp_amd(family); + + rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; + if (!quiet) + fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); +} + +/* + * rapl_probe() + * + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units + */ +void rapl_probe(unsigned int family, unsigned int model) +{ + if (genuine_intel) + rapl_probe_intel(family, model); + if (authentic_amd || hygon_genuine) + rapl_probe_amd(family, model); +} + +void perf_limit_reasons_probe(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return; + + if (family != 6) + return; + + switch (model) { + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + do_gfx_perf_limit_reasons = 1; + /* FALLTHRU */ + case INTEL_FAM6_HASWELL_X: /* HSX */ + do_core_perf_limit_reasons = 1; + do_ring_perf_limit_reasons = 1; + default: + return; + } +} + +void automatic_cstate_conversion_probe(unsigned int family, unsigned int model) +{ + if (family != 6) + return; + + switch (model) { + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_SKYLAKE_X: + has_automatic_cstate_conversion = 1; + } +} + +void prewake_cstate_probe(unsigned int family, unsigned int model) +{ + if (is_icx(family, model) || is_spr(family, model)) + dis_cstate_prewake = 1; +} + +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int dts, dts2; + int cpu; + + UNUSED(c); + UNUSED(p); + + if (!(do_dts || do_ptm)) + return 0; + + cpu = t->cpu_id; + + /* DTS is per-core, no need to print for each thread */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) + return 0; + + if (cpu_migrate(cpu)) { + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts); + + if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + if (do_dts && debug) { + unsigned int resolution; + + if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + resolution = (msr >> 27) & 0xF; + fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", + cpu, msr, tj_max - dts, resolution); + + if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) + return 0; + + dts = (msr >> 16) & 0x7F; + dts2 = (msr >> 8) & 0x7F; + fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", + cpu, msr, tj_max - dts, tj_max - dts2); + } + + return 0; +} + +void print_power_limit_msr(int cpu, unsigned long long msr, char *label) +{ + fprintf(outf, "cpu%d: %s: %sabled (%0.3f Watts, %f sec, clamp %sabled)\n", + cpu, label, + ((msr >> 15) & 1) ? "EN" : "DIS", + ((msr >> 0) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, + (((msr >> 16) & 1) ? "EN" : "DIS")); + + return; +} + +int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + const char *msr_name; + int cpu; + + UNUSED(c); + UNUSED(p); + + if (!do_rapl) + return 0; + + /* RAPL counters are per package, so print only for 1st thread/package */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (do_rapl & RAPL_AMD_F17H) { + msr_name = "MSR_RAPL_PWR_UNIT"; + if (get_msr(cpu, MSR_RAPL_PWR_UNIT, &msr)) + return -1; + } else { + msr_name = "MSR_RAPL_POWER_UNIT"; + if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) + return -1; + } + + fprintf(outf, "cpu%d: %s: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr_name, msr, + rapl_power_units, rapl_energy_units, rapl_time_units); + + if (do_rapl & RAPL_PKG_POWER_INFO) { + + if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) + return -5; + + fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + } + if (do_rapl & RAPL_PKG) { + + if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) + return -9; + + fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 63) & 1 ? "" : "UN"); + + print_power_limit_msr(cpu, msr, "PKG Limit #1"); + fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%0.3f Watts, %f* sec, clamp %sabled)\n", + cpu, + ((msr >> 47) & 1) ? "EN" : "DIS", + ((msr >> 32) & 0x7FFF) * rapl_power_units, + (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, + ((msr >> 48) & 1) ? "EN" : "DIS"); + + if (get_msr(cpu, MSR_VR_CURRENT_CONFIG, &msr)) + return -9; + + fprintf(outf, "cpu%d: MSR_VR_CURRENT_CONFIG: 0x%08llx\n", cpu, msr); + fprintf(outf, "cpu%d: PKG Limit #4: %f Watts (%slocked)\n", + cpu, ((msr >> 0) & 0x1FFF) * rapl_power_units, (msr >> 31) & 1 ? "" : "UN"); + } + + if (do_rapl & RAPL_DRAM_POWER_INFO) { + if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) + return -6; + + fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", + cpu, msr, + ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, + ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + } + if (do_rapl & RAPL_DRAM) { + if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) + return -9; + fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + + print_power_limit_msr(cpu, msr, "DRAM Limit"); + } + if (do_rapl & RAPL_CORE_POLICY) { + if (get_msr(cpu, MSR_PP0_POLICY, &msr)) + return -7; + + fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); + } + if (do_rapl & RAPL_CORES_POWER_LIMIT) { + if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) + return -9; + fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + print_power_limit_msr(cpu, msr, "Cores Limit"); + } + if (do_rapl & RAPL_GFX) { + if (get_msr(cpu, MSR_PP1_POLICY, &msr)) + return -8; + + fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + + if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) + return -9; + fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", + cpu, msr, (msr >> 31) & 1 ? "" : "UN"); + print_power_limit_msr(cpu, msr, "GFX Limit"); + } + return 0; +} + +/* + * SNB adds support for additional MSRs: + * + * MSR_PKG_C7_RESIDENCY 0x000003fa + * MSR_CORE_C7_RESIDENCY 0x000003fe + * MSR_PKG_C2_RESIDENCY 0x0000060d + */ + +int has_snb_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_SANDYBRIDGE: + case INTEL_FAM6_SANDYBRIDGE_X: + case INTEL_FAM6_IVYBRIDGE: /* IVB */ + case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */ + case INTEL_FAM6_HASWELL: /* HSW */ + case INTEL_FAM6_HASWELL_X: /* HSW */ + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_HASWELL_G: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_BROADWELL_G: /* BDW */ + case INTEL_FAM6_BROADWELL_X: /* BDX */ + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + case INTEL_FAM6_SKYLAKE_X: /* SKX */ + case INTEL_FAM6_ICELAKE_X: /* ICX */ + case INTEL_FAM6_SAPPHIRERAPIDS_X: /* SPR */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + return 1; + } + return 0; +} + +/* + * HSW ULT added support for C8/C9/C10 MSRs: + * + * MSR_PKG_C8_RESIDENCY 0x00000630 + * MSR_PKG_C9_RESIDENCY 0x00000631 + * MSR_PKG_C10_RESIDENCY 0x00000632 + * + * MSR_PKGC8_IRTL 0x00000633 + * MSR_PKGC9_IRTL 0x00000634 + * MSR_PKGC10_IRTL 0x00000635 + * + */ +int has_c8910_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_HASWELL_L: /* HSW */ + case INTEL_FAM6_BROADWELL: /* BDW */ + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + return 1; + } + return 0; +} + +/* + * SKL adds support for additional MSRS: + * + * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 + * MSR_PKG_ANY_CORE_C0_RES 0x00000659 + * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A + * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + */ +int has_skl_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + return 1; + } + return 0; +} + +int is_slm(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */ + case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */ + return 1; + } + return 0; +} + +int is_knl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_XEON_PHI_KNL: /* KNL */ + return 1; + } + return 0; +} + +int is_cnl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case INTEL_FAM6_CANNONLAKE_L: /* CNL */ + return 1; + } + + return 0; +} + +unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model) +{ + if (is_knl(family, model)) + return 1024; + return 1; +} + +#define SLM_BCLK_FREQS 5 +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 }; + +double slm_bclk(void) +{ + unsigned long long msr = 3; + unsigned int i; + double freq; + + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) + fprintf(outf, "SLM BCLK: unknown\n"); + + i = msr & 0xf; + if (i >= SLM_BCLK_FREQS) { + fprintf(outf, "SLM BCLK[%d] invalid\n", i); + i = 3; + } + freq = slm_freq_table[i]; + + if (!quiet) + fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq); + + return freq; +} + +double discover_bclk(unsigned int family, unsigned int model) +{ + if (has_snb_msrs(family, model) || is_knl(family, model)) + return 100.00; + else if (is_slm(family, model)) + return slm_bclk(); + else + return 133.33; +} + +int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned int eax, ebx, ecx, edx; + + UNUSED(c); + UNUSED(p); + + if (!genuine_intel) + return 0; + + if (cpu_migrate(t->cpu_id)) { + fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id); + return -1; + } + + if (max_level < 0x1a) + return 0; + + __cpuid(0x1a, eax, ebx, ecx, edx); + eax = (eax >> 24) & 0xFF; + if (eax == 0x20) + t->is_atom = true; + return 0; +} + +/* + * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where + * the Thermal Control Circuit (TCC) activates. + * This is usually equal to tjMax. + * + * Older processors do not have this MSR, so there we guess, + * but also allow cmdline over-ride with -T. + * + * Several MSR temperature values are in units of degrees-C + * below this value, including the Digital Thermal Sensor (DTS), + * Package Thermal Management Sensor (PTM), and thermal event thresholds. + */ +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + unsigned long long msr; + unsigned int tcc_default, tcc_offset; + int cpu; + + UNUSED(c); + UNUSED(p); + + /* tj_max is used only for dts or ptm */ + if (!(do_dts || do_ptm)) + return 0; + + /* this is a per-package concept */ + if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) + return 0; + + cpu = t->cpu_id; + if (cpu_migrate(cpu)) { + fprintf(outf, "Could not migrate to CPU %d\n", cpu); + return -1; + } + + if (tj_max_override != 0) { + tj_max = tj_max_override; + fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max); + return 0; + } + + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + goto guess; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + goto guess; + + tcc_default = (msr >> 16) & 0xFF; + + if (!quiet) { + switch (tcc_offset_bits) { + case 4: + tcc_offset = (msr >> 24) & 0xF; + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); + break; + case 6: + tcc_offset = (msr >> 24) & 0x3F; + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset); + break; + default: + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default); + break; + } + } + + if (!tcc_default) + goto guess; + + tj_max = tcc_default; + + return 0; + +guess: + tj_max = TJMAX_DEFAULT; + fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max); + + return 0; +} + +void decode_feature_control_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr)) + fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n", + base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : ""); +} + +void decode_misc_enable_msr(void) +{ + unsigned long long msr; + + if (!genuine_intel) + return; + + if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr)) + fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n", + base_cpu, msr, + msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-", + msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-", + msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-", + msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "", + msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : ""); +} + +void decode_misc_feature_control(void) +{ + unsigned long long msr; + + if (!has_misc_feature_control) + return; + + if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr)) + fprintf(outf, + "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n", + base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "", + msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : ""); +} + +/* + * Decode MSR_MISC_PWR_MGMT + * + * Decode the bits according to the Nehalem documentation + * bit[0] seems to continue to have same meaning going forward + * bit[1] less so... + */ +void decode_misc_pwr_mgmt_msr(void) +{ + unsigned long long msr; + + if (!do_nhm_platform_info) + return; + + if (no_MSR_MISC_PWR_MGMT) + return; + + if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr)) + fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n", + base_cpu, msr, + msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS"); +} + +/* + * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG + * + * This MSRs are present on Silvermont processors, + * Intel Atom processor E3000 series (Baytrail), and friends. + */ +void decode_c6_demotion_policy_msr(void) +{ + unsigned long long msr; + + if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr)) + fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n", + base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); + + if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr)) + fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n", + base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS"); +} + +/* + * When models are the same, for the purpose of turbostat, reuse + */ +unsigned int intel_model_duplicates(unsigned int model) +{ + + switch (model) { + case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ + case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ + case 0x1F: /* Core i7 and i5 Processor - Nehalem */ + case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */ + case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */ + return INTEL_FAM6_NEHALEM; + + case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */ + case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */ + return INTEL_FAM6_NEHALEM_EX; + + case INTEL_FAM6_XEON_PHI_KNM: + return INTEL_FAM6_XEON_PHI_KNL; + + case INTEL_FAM6_BROADWELL_X: + case INTEL_FAM6_BROADWELL_D: /* BDX-DE */ + return INTEL_FAM6_BROADWELL_X; + + case INTEL_FAM6_SKYLAKE_L: + case INTEL_FAM6_SKYLAKE: + case INTEL_FAM6_KABYLAKE_L: + case INTEL_FAM6_KABYLAKE: + case INTEL_FAM6_COMETLAKE_L: + case INTEL_FAM6_COMETLAKE: + return INTEL_FAM6_SKYLAKE_L; + + case INTEL_FAM6_ICELAKE_L: + case INTEL_FAM6_ICELAKE_NNPI: + case INTEL_FAM6_TIGERLAKE_L: + case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_LAKEFIELD: + case INTEL_FAM6_ALDERLAKE: + case INTEL_FAM6_ALDERLAKE_L: + case INTEL_FAM6_ALDERLAKE_N: + case INTEL_FAM6_RAPTORLAKE: + case INTEL_FAM6_RAPTORLAKE_P: + case INTEL_FAM6_RAPTORLAKE_S: + case INTEL_FAM6_METEORLAKE: + case INTEL_FAM6_METEORLAKE_L: + return INTEL_FAM6_CANNONLAKE_L; + + case INTEL_FAM6_ATOM_TREMONT_L: + return INTEL_FAM6_ATOM_TREMONT; + + case INTEL_FAM6_ICELAKE_D: + return INTEL_FAM6_ICELAKE_X; + } + return model; +} + +void print_dev_latency(void) +{ + char *path = "/dev/cpu_dma_latency"; + int fd; + int value; + int retval; + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn("fopen %s\n", path); + return; + } + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { + warn("read failed %s\n", path); + close(fd); + return; + } + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained"); + + close(fd); +} + +/* + * Linux-perf manages the HW instructions-retired counter + * by enabling when requested, and hiding rollover + */ +void linux_perf_init(void) +{ + if (!BIC_IS_ENABLED(BIC_IPC)) + return; + + if (access("/proc/sys/kernel/perf_event_paranoid", F_OK)) + return; + + fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_instr_count_percpu == NULL) + err(-1, "calloc fd_instr_count_percpu"); + + BIC_PRESENT(BIC_IPC); +} + +void process_cpuid() +{ + unsigned int eax, ebx, ecx, edx; + unsigned int fms, family, model, stepping, ecx_flags, edx_flags; + unsigned long long ucode_patch = 0; + + eax = ebx = ecx = edx = 0; + + __cpuid(0, max_level, ebx, ecx, edx); + + if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69) + genuine_intel = 1; + else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65) + authentic_amd = 1; + else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e) + hygon_genuine = 1; + + if (!quiet) + fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n", + (char *)&ebx, (char *)&edx, (char *)&ecx, max_level); + + __cpuid(1, fms, ebx, ecx, edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 0xf) + family += (fms >> 20) & 0xff; + if (family >= 6) + model += ((fms >> 16) & 0xf) << 4; + ecx_flags = ecx; + edx_flags = edx; + + if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch)) + warnx("get_msr(UCODE)\n"); + + /* + * check max extended function levels of CPUID. + * This is needed to check for invariant TSC. + * This check is valid for both Intel and AMD. + */ + ebx = ecx = edx = 0; + __cpuid(0x80000000, max_extended_level, ebx, ecx, edx); + + if (!quiet) { + fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n", + family, model, stepping, family, model, stepping, + (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF)); + fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level); + fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n", + ecx_flags & (1 << 0) ? "SSE3" : "-", + ecx_flags & (1 << 3) ? "MONITOR" : "-", + ecx_flags & (1 << 6) ? "SMX" : "-", + ecx_flags & (1 << 7) ? "EIST" : "-", + ecx_flags & (1 << 8) ? "TM2" : "-", + edx_flags & (1 << 4) ? "TSC" : "-", + edx_flags & (1 << 5) ? "MSR" : "-", + edx_flags & (1 << 22) ? "ACPI-TM" : "-", + edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-"); + } + if (genuine_intel) { + model_orig = model; + model = intel_model_duplicates(model); + } + + if (!(edx_flags & (1 << 5))) + errx(1, "CPUID: no MSR"); + + if (max_extended_level >= 0x80000007) { + + /* + * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 + * this check is valid for both Intel and AMD + */ + __cpuid(0x80000007, eax, ebx, ecx, edx); + has_invariant_tsc = edx & (1 << 8); + } + + /* + * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 + * this check is valid for both Intel and AMD + */ + + __cpuid(0x6, eax, ebx, ecx, edx); + has_aperf = ecx & (1 << 0); + if (has_aperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + } + do_dts = eax & (1 << 0); + if (do_dts) + BIC_PRESENT(BIC_CoreTmp); + has_turbo = eax & (1 << 1); + do_ptm = eax & (1 << 6); + if (do_ptm) + BIC_PRESENT(BIC_PkgTmp); + has_hwp = eax & (1 << 7); + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_pkg = eax & (1 << 11); + has_epb = ecx & (1 << 3); + + if (!quiet) + fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, " + "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + has_aperf ? "" : "No-", + has_turbo ? "" : "No-", + do_dts ? "" : "No-", + do_ptm ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-"); + + if (!quiet) + decode_misc_enable_msr(); + + if (max_level >= 0x7 && !quiet) { + int has_sgx; + + ecx = 0; + + __cpuid_count(0x7, 0, eax, ebx, ecx, edx); + + has_sgx = ebx & (1 << 2); + + is_hybrid = edx & (1 << 15); + + fprintf(outf, "CPUID(7): %sSGX %sHybrid\n", has_sgx ? "" : "No-", is_hybrid ? "" : "No-"); + + if (has_sgx) + decode_feature_control_msr(); + } + + if (max_level >= 0x15) { + unsigned int eax_crystal; + unsigned int ebx_tsc; + + /* + * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz + */ + eax_crystal = ebx_tsc = crystal_hz = edx = 0; + __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx); + + if (ebx_tsc != 0) { + + if (!quiet && (ebx != 0)) + fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + eax_crystal, ebx_tsc, crystal_hz); + + if (crystal_hz == 0) + switch (model) { + case INTEL_FAM6_SKYLAKE_L: /* SKL */ + crystal_hz = 24000000; /* 24.0 MHz */ + break; + case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ + crystal_hz = 25000000; /* 25.0 MHz */ + break; + case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */ + case INTEL_FAM6_ATOM_GOLDMONT_PLUS: + crystal_hz = 19200000; /* 19.2 MHz */ + break; + default: + crystal_hz = 0; + } + + if (crystal_hz) { + tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal; + if (!quiet) + fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); + } + } + } + if (max_level >= 0x16) { + unsigned int base_mhz, max_mhz, bus_mhz, edx; + + /* + * CPUID 16H Base MHz, Max MHz, Bus MHz + */ + base_mhz = max_mhz = bus_mhz = edx = 0; + + __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx); + if (!quiet) + fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n", + base_mhz, max_mhz, bus_mhz); + } + + if (has_aperf) + aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model); + + BIC_PRESENT(BIC_IRQ); + BIC_PRESENT(BIC_TSC_MHz); + + if (probe_nhm_msrs(family, model)) { + do_nhm_platform_info = 1; + BIC_PRESENT(BIC_CPU_c1); + BIC_PRESENT(BIC_CPU_c3); + BIC_PRESENT(BIC_CPU_c6); + BIC_PRESENT(BIC_SMI); + } + do_snb_cstates = has_snb_msrs(family, model); + + if (do_snb_cstates) + BIC_PRESENT(BIC_CPU_c7); + + do_irtl_snb = has_snb_msrs(family, model); + if (do_snb_cstates && (pkg_cstate_limit >= PCL__2)) + BIC_PRESENT(BIC_Pkgpc2); + if (pkg_cstate_limit >= PCL__3) + BIC_PRESENT(BIC_Pkgpc3); + if (pkg_cstate_limit >= PCL__6) + BIC_PRESENT(BIC_Pkgpc6); + if (do_snb_cstates && (pkg_cstate_limit >= PCL__7)) + BIC_PRESENT(BIC_Pkgpc7); + if (has_slv_msrs(family, model)) { + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + BIC_PRESENT(BIC_Mod_c6); + use_c1_residency_msr = 1; + } + if (is_jvl(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } + if (is_dnv(family, model)) { + BIC_PRESENT(BIC_CPU_c1); + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + use_c1_residency_msr = 1; + } + if (is_skx(family, model) || is_icx(family, model) || is_spr(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } + if (is_bdx(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } + if (has_c8910_msrs(family, model)) { + if (pkg_cstate_limit >= PCL__8) + BIC_PRESENT(BIC_Pkgpc8); + if (pkg_cstate_limit >= PCL__9) + BIC_PRESENT(BIC_Pkgpc9); + if (pkg_cstate_limit >= PCL_10) + BIC_PRESENT(BIC_Pkgpc10); + } + do_irtl_hsw = has_c8910_msrs(family, model); + if (has_skl_msrs(family, model)) { + BIC_PRESENT(BIC_Totl_c0); + BIC_PRESENT(BIC_Any_c0); + BIC_PRESENT(BIC_GFX_c0); + BIC_PRESENT(BIC_CPUGFX); + } + do_slm_cstates = is_slm(family, model); + do_knl_cstates = is_knl(family, model); + + if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model)) + BIC_NOT_PRESENT(BIC_CPU_c3); + + if (!quiet) + decode_misc_pwr_mgmt_msr(); + + if (!quiet && has_slv_msrs(family, model)) + decode_c6_demotion_policy_msr(); + + rapl_probe(family, model); + perf_limit_reasons_probe(family, model); + automatic_cstate_conversion_probe(family, model); + prewake_cstate_probe(family, model); + + check_tcc_offset(model_orig); + + if (!quiet) + dump_cstate_pstate_config_info(family, model); + intel_uncore_frequency_probe(); + + if (!quiet) + print_dev_latency(); + if (!quiet) + dump_sysfs_cstate_config(); + if (!quiet) + dump_sysfs_pstate_config(); + + if (has_skl_msrs(family, model) || is_ehl(family, model)) + calculate_tsc_tweak(); + + if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK)) + BIC_PRESENT(BIC_GFX_rc6); + + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXMHz); + + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); + + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) + BIC_PRESENT(BIC_CPU_LPI); + else + BIC_NOT_PRESENT(BIC_CPU_LPI); + + if (!access("/sys/devices/system/cpu/cpu0/thermal_throttle/core_throttle_count", R_OK)) + BIC_PRESENT(BIC_CORE_THROT_CNT); + else + BIC_NOT_PRESENT(BIC_CORE_THROT_CNT); + + if (!access(sys_lpi_file_sysfs, R_OK)) { + sys_lpi_file = sys_lpi_file_sysfs; + BIC_PRESENT(BIC_SYS_LPI); + } else if (!access(sys_lpi_file_debugfs, R_OK)) { + sys_lpi_file = sys_lpi_file_debugfs; + BIC_PRESENT(BIC_SYS_LPI); + } else { + sys_lpi_file_sysfs = NULL; + BIC_NOT_PRESENT(BIC_SYS_LPI); + } + + if (!quiet) + decode_misc_feature_control(); + + return; +} + +/* + * in /dev/cpu/ return success for names that are numbers + * ie. filter out ".", "..", "microcode". + */ +int dir_filter(const struct dirent *dirp) +{ + if (isdigit(dirp->d_name[0])) + return 1; + else + return 0; +} + +void topology_probe() +{ + int i; + int max_core_id = 0; + int max_package_id = 0; + int max_die_id = 0; + int max_siblings = 0; + + /* Initialize num_cpus, max_cpu_num */ + set_max_cpu_num(); + topo.num_cpus = 0; + for_all_proc_cpus(count_cpus); + if (!summary_only && topo.num_cpus > 1) + BIC_PRESENT(BIC_CPU); + + if (debug > 1) + fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + + cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology)); + if (cpus == NULL) + err(1, "calloc cpus"); + + /* + * Allocate and initialize cpu_present_set + */ + cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_present_set == NULL) + err(3, "CPU_ALLOC"); + cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_present_setsize, cpu_present_set); + for_all_proc_cpus(mark_cpu_present); + + /* + * Validate that all cpus in cpu_subset are also in cpu_present_set + */ + for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) { + if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset)) + if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set)) + err(1, "cpu%d not present", i); + } + + /* + * Allocate and initialize cpu_affinity_set + */ + cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); + if (cpu_affinity_set == NULL) + err(3, "CPU_ALLOC"); + cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); + CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + + for_all_proc_cpus(init_thread_id); + + /* + * For online cpus + * find max_core_id, max_package_id + */ + for (i = 0; i <= topo.max_cpu_num; ++i) { + int siblings; + + if (cpu_is_not_present(i)) { + if (debug > 1) + fprintf(outf, "cpu%d NOT PRESENT\n", i); + continue; + } + + cpus[i].logical_cpu_id = i; + + /* get package information */ + cpus[i].physical_package_id = get_physical_package_id(i); + if (cpus[i].physical_package_id > max_package_id) + max_package_id = cpus[i].physical_package_id; + + /* get die information */ + cpus[i].die_id = get_die_id(i); + if (cpus[i].die_id > max_die_id) + max_die_id = cpus[i].die_id; + + /* get numa node information */ + cpus[i].physical_node_id = get_physical_node_id(&cpus[i]); + if (cpus[i].physical_node_id > topo.max_node_num) + topo.max_node_num = cpus[i].physical_node_id; + + /* get core information */ + cpus[i].physical_core_id = get_core_id(i); + if (cpus[i].physical_core_id > max_core_id) + max_core_id = cpus[i].physical_core_id; + + /* get thread information */ + siblings = get_thread_siblings(&cpus[i]); + if (siblings > max_siblings) + max_siblings = siblings; + if (cpus[i].thread_id == 0) + topo.num_cores++; + } + + topo.cores_per_node = max_core_id + 1; + if (debug > 1) + fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node); + if (!summary_only && topo.cores_per_node > 1) + BIC_PRESENT(BIC_Core); + + topo.num_die = max_die_id + 1; + if (debug > 1) + fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die); + if (!summary_only && topo.num_die > 1) + BIC_PRESENT(BIC_Die); + + topo.num_packages = max_package_id + 1; + if (debug > 1) + fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); + if (!summary_only && topo.num_packages > 1) + BIC_PRESENT(BIC_Package); + + set_node_data(); + if (debug > 1) + fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg); + if (!summary_only && topo.nodes_per_pkg > 1) + BIC_PRESENT(BIC_Node); + + topo.threads_per_core = max_siblings; + if (debug > 1) + fprintf(outf, "max_siblings %d\n", max_siblings); + + if (debug < 1) + return; + + for (i = 0; i <= topo.max_cpu_num; ++i) { + if (cpu_is_not_present(i)) + continue; + fprintf(outf, + "cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n", + i, cpus[i].physical_package_id, cpus[i].die_id, + cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id); + } + +} + +void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +{ + int i; + int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages; + int num_threads = topo.threads_per_core * num_cores; + + *t = calloc(num_threads, sizeof(struct thread_data)); + if (*t == NULL) + goto error; + + for (i = 0; i < num_threads; i++) + (*t)[i].cpu_id = -1; + + *c = calloc(num_cores, sizeof(struct core_data)); + if (*c == NULL) + goto error; + + for (i = 0; i < num_cores; i++) + (*c)[i].core_id = -1; + + *p = calloc(topo.num_packages, sizeof(struct pkg_data)); + if (*p == NULL) + goto error; + + for (i = 0; i < topo.num_packages; i++) + (*p)[i].package_id = i; + + return; +error: + err(1, "calloc counters"); +} + +/* + * init_counter() + * + * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE + */ +void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id) +{ + int pkg_id = cpus[cpu_id].physical_package_id; + int node_id = cpus[cpu_id].logical_node_id; + int core_id = cpus[cpu_id].physical_core_id; + int thread_id = cpus[cpu_id].thread_id; + struct thread_data *t; + struct core_data *c; + struct pkg_data *p; + + /* Workaround for systems where physical_node_id==-1 + * and logical_node_id==(-1 - topo.num_cpus) + */ + if (node_id < 0) + node_id = 0; + + t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id); + c = GET_CORE(core_base, core_id, node_id, pkg_id); + p = GET_PKG(pkg_base, pkg_id); + + t->cpu_id = cpu_id; + if (thread_id == 0) { + t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; + if (cpu_is_first_core_in_package(cpu_id)) + t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; + } + + c->core_id = core_id; + p->package_id = pkg_id; +} + +int initialize_counters(int cpu_id) +{ + init_counter(EVEN_COUNTERS, cpu_id); + init_counter(ODD_COUNTERS, cpu_id); + return 0; +} + +void allocate_output_buffer() +{ + output_buffer = calloc(1, (1 + topo.num_cpus) * 2048); + outp = output_buffer; + if (outp == NULL) + err(-1, "calloc output buffer"); +} + +void allocate_fd_percpu(void) +{ + fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (fd_percpu == NULL) + err(-1, "calloc fd_percpu"); +} + +void allocate_irq_buffers(void) +{ + irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int)); + if (irq_column_2_cpu == NULL) + err(-1, "calloc %d", topo.num_cpus); + + irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int)); + if (irqs_per_cpu == NULL) + err(-1, "calloc %d", topo.max_cpu_num + 1); +} + +void setup_all_buffers(void) +{ + topology_probe(); + allocate_irq_buffers(); + allocate_fd_percpu(); + allocate_counters(&thread_even, &core_even, &package_even); + allocate_counters(&thread_odd, &core_odd, &package_odd); + allocate_output_buffer(); + for_all_proc_cpus(initialize_counters); +} + +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); + + if (debug > 1) + fprintf(outf, "base_cpu = %d\n", base_cpu); +} + +void turbostat_init() +{ + setup_all_buffers(); + set_base_cpu(); + check_dev_msr(); + check_permissions(); + process_cpuid(); + linux_perf_init(); + + if (!quiet) + for_all_cpus(print_hwp, ODD_COUNTERS); + + if (!quiet) + for_all_cpus(print_epb, ODD_COUNTERS); + + if (!quiet) + for_all_cpus(print_perf_limit, ODD_COUNTERS); + + if (!quiet) + for_all_cpus(print_rapl, ODD_COUNTERS); + + for_all_cpus(set_temperature_target, ODD_COUNTERS); + + for_all_cpus(get_cpu_type, ODD_COUNTERS); + for_all_cpus(get_cpu_type, EVEN_COUNTERS); + + if (!quiet) + for_all_cpus(print_thermal, ODD_COUNTERS); + + if (!quiet && do_irtl_snb) + print_irtl(); + + if (DO_BIC(BIC_IPC)) + (void)get_instr_count_fd(base_cpu); +} + +int fork_it(char **argv) +{ + pid_t child_pid; + int status; + + snapshot_proc_sysfs_files(); + status = for_all_cpus(get_counters, EVEN_COUNTERS); + first_counter_read = 0; + if (status) + exit(status); + /* clear affinity side-effect of get_counters() */ + sched_setaffinity(0, cpu_present_setsize, cpu_present_set); + gettimeofday(&tv_even, (struct timezone *)NULL); + + child_pid = fork(); + if (!child_pid) { + /* child */ + execvp(argv[0], argv); + err(errno, "exec %s", argv[0]); + } else { + + /* parent */ + if (child_pid == -1) + err(1, "fork"); + + signal(SIGINT, SIG_IGN); + signal(SIGQUIT, SIG_IGN); + if (waitpid(child_pid, &status, 0) == -1) + err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); + } + /* + * n.b. fork_it() does not check for errors from for_all_cpus() + * because re-starting is problematic when forking + */ + snapshot_proc_sysfs_files(); + for_all_cpus(get_counters, ODD_COUNTERS); + gettimeofday(&tv_odd, (struct timezone *)NULL); + timersub(&tv_odd, &tv_even, &tv_delta); + if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) + fprintf(outf, "%s: Counter reset detected\n", progname); + else { + compute_average(EVEN_COUNTERS); + format_all_counters(EVEN_COUNTERS); + } + + fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0); + + flush_output_stderr(); + + return status; +} + +int get_and_dump_counters(void) +{ + int status; + + snapshot_proc_sysfs_files(); + status = for_all_cpus(get_counters, ODD_COUNTERS); + if (status) + return status; + + status = for_all_cpus(dump_counters, ODD_COUNTERS); + if (status) + return status; + + flush_output_stdout(); + + return status; +} + +void print_version() +{ + fprintf(outf, "turbostat version 2022.10.04 - Len Brown <lenb@kernel.org>\n"); +} + +#define COMMAND_LINE_SIZE 2048 + +void print_bootcmd(void) +{ + char bootcmd[COMMAND_LINE_SIZE]; + FILE *fp; + int ret; + + memset(bootcmd, 0, COMMAND_LINE_SIZE); + fp = fopen("/proc/cmdline", "r"); + if (!fp) + return; + + ret = fread(bootcmd, sizeof(char), COMMAND_LINE_SIZE - 1, fp); + if (ret) { + bootcmd[ret] = '\0'; + /* the last character is already '\n' */ + fprintf(outf, "Kernel command line: %s", bootcmd); + } + + fclose(fp); +} + +int add_counter(unsigned int msr_num, char *path, char *name, + unsigned int width, enum counter_scope scope, + enum counter_type type, enum counter_format format, int flags) +{ + struct msr_counter *msrp; + + msrp = calloc(1, sizeof(struct msr_counter)); + if (msrp == NULL) { + perror("calloc"); + exit(1); + } + + msrp->msr_num = msr_num; + strncpy(msrp->name, name, NAME_BYTES - 1); + if (path) + strncpy(msrp->path, path, PATH_BYTES - 1); + msrp->width = width; + msrp->type = type; + msrp->format = format; + msrp->flags = flags; + + switch (scope) { + + case SCOPE_CPU: + msrp->next = sys.tp; + sys.tp = msrp; + sys.added_thread_counters++; + if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) { + fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS); + exit(-1); + } + break; + + case SCOPE_CORE: + msrp->next = sys.cp; + sys.cp = msrp; + sys.added_core_counters++; + if (sys.added_core_counters > MAX_ADDED_COUNTERS) { + fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS); + exit(-1); + } + break; + + case SCOPE_PACKAGE: + msrp->next = sys.pp; + sys.pp = msrp; + sys.added_package_counters++; + if (sys.added_package_counters > MAX_ADDED_COUNTERS) { + fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS); + exit(-1); + } + break; + } + + return 0; +} + +void parse_add_command(char *add_command) +{ + int msr_num = 0; + char *path = NULL; + char name_buffer[NAME_BYTES] = ""; + int width = 64; + int fail = 0; + enum counter_scope scope = SCOPE_CPU; + enum counter_type type = COUNTER_CYCLES; + enum counter_format format = FORMAT_DELTA; + + while (add_command) { + + if (sscanf(add_command, "msr0x%x", &msr_num) == 1) + goto next; + + if (sscanf(add_command, "msr%d", &msr_num) == 1) + goto next; + + if (*add_command == '/') { + path = add_command; + goto next; + } + + if (sscanf(add_command, "u%d", &width) == 1) { + if ((width == 32) || (width == 64)) + goto next; + width = 64; + } + if (!strncmp(add_command, "cpu", strlen("cpu"))) { + scope = SCOPE_CPU; + goto next; + } + if (!strncmp(add_command, "core", strlen("core"))) { + scope = SCOPE_CORE; + goto next; + } + if (!strncmp(add_command, "package", strlen("package"))) { + scope = SCOPE_PACKAGE; + goto next; + } + if (!strncmp(add_command, "cycles", strlen("cycles"))) { + type = COUNTER_CYCLES; + goto next; + } + if (!strncmp(add_command, "seconds", strlen("seconds"))) { + type = COUNTER_SECONDS; + goto next; + } + if (!strncmp(add_command, "usec", strlen("usec"))) { + type = COUNTER_USEC; + goto next; + } + if (!strncmp(add_command, "raw", strlen("raw"))) { + format = FORMAT_RAW; + goto next; + } + if (!strncmp(add_command, "delta", strlen("delta"))) { + format = FORMAT_DELTA; + goto next; + } + if (!strncmp(add_command, "percent", strlen("percent"))) { + format = FORMAT_PERCENT; + goto next; + } + + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + char *eos; + + eos = strchr(name_buffer, ','); + if (eos) + *eos = '\0'; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) { + *add_command = '\0'; + add_command++; + } + + } + if ((msr_num == 0) && (path == NULL)) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); + fail++; + } + + /* generate default column header */ + if (*name_buffer == '\0') { + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + } + + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0)) + fail++; + + if (fail) { + help(); + exit(1); + } +} + +int is_deferred_add(char *name) +{ + int i; + + for (i = 0; i < deferred_add_index; ++i) + if (!strcmp(name, deferred_add_names[i])) + return 1; + return 0; +} + +int is_deferred_skip(char *name) +{ + int i; + + for (i = 0; i < deferred_skip_index; ++i) + if (!strcmp(name, deferred_skip_names[i])) + return 1; + return 0; +} + +void probe_sysfs(void) +{ + char path[64]; + char name_buf[16]; + FILE *input; + int state; + char *sp; + + for (state = 10; state >= 0; --state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); + + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '%'; + *(sp + 1) = '\0'; + + remove_underbar(name_buf); + + fclose(input); + + sprintf(path, "cpuidle/state%d/time", state); + + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + + if (is_deferred_skip(name_buf)) + continue; + + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU); + } + + for (state = 10; state >= 0; --state) { + + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state); + input = fopen(path, "r"); + if (input == NULL) + continue; + if (!fgets(name_buf, sizeof(name_buf), input)) + err(1, "%s: failed to read file", path); + /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */ + sp = strchr(name_buf, '-'); + if (!sp) + sp = strchrnul(name_buf, '\n'); + *sp = '\0'; + fclose(input); + + remove_underbar(name_buf); + + sprintf(path, "cpuidle/state%d/usage", state); + + if (!DO_BIC(BIC_sysfs) && !is_deferred_add(name_buf)) + continue; + + if (is_deferred_skip(name_buf)) + continue; + + add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU); + } + +} + +/* + * parse cpuset with following syntax + * 1,2,4..6,8-10 and set bits in cpu_subset + */ +void parse_cpu_command(char *optarg) +{ + unsigned int start, end; + char *next; + + if (!strcmp(optarg, "core")) { + if (cpu_subset) + goto error; + show_core_only++; + return; + } + if (!strcmp(optarg, "package")) { + if (cpu_subset) + goto error; + show_pkg_only++; + return; + } + if (show_core_only || show_pkg_only) + goto error; + + cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS); + if (cpu_subset == NULL) + err(3, "CPU_ALLOC"); + cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS); + + CPU_ZERO_S(cpu_subset_size, cpu_subset); + + next = optarg; + + while (next && *next) { + + if (*next == '-') /* no negative cpu numbers */ + goto error; + + start = strtoul(next, &next, 10); + + if (start >= CPU_SUBSET_MAXCPUS) + goto error; + CPU_SET_S(start, cpu_subset_size, cpu_subset); + + if (*next == '\0') + break; + + if (*next == ',') { + next += 1; + continue; + } + + if (*next == '-') { + next += 1; /* start range */ + } else if (*next == '.') { + next += 1; + if (*next == '.') + next += 1; /* start range */ + else + goto error; + } + + end = strtoul(next, &next, 10); + if (end <= start) + goto error; + + while (++start <= end) { + if (start >= CPU_SUBSET_MAXCPUS) + goto error; + CPU_SET_S(start, cpu_subset_size, cpu_subset); + } + + if (*next == ',') + next += 1; + else if (*next != '\0') + goto error; + } + + return; + +error: + fprintf(stderr, "\"--cpu %s\" malformed\n", optarg); + help(); + exit(-1); +} + +void cmdline(int argc, char **argv) +{ + int opt; + int option_index = 0; + static struct option long_options[] = { + { "add", required_argument, 0, 'a' }, + { "cpu", required_argument, 0, 'c' }, + { "Dump", no_argument, 0, 'D' }, + { "debug", no_argument, 0, 'd' }, /* internal, not documented */ + { "enable", required_argument, 0, 'e' }, + { "interval", required_argument, 0, 'i' }, + { "IPC", no_argument, 0, 'I' }, + { "num_iterations", required_argument, 0, 'n' }, + { "header_iterations", required_argument, 0, 'N' }, + { "help", no_argument, 0, 'h' }, + { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help + { "Joules", no_argument, 0, 'J' }, + { "list", no_argument, 0, 'l' }, + { "out", required_argument, 0, 'o' }, + { "quiet", no_argument, 0, 'q' }, + { "show", required_argument, 0, 's' }, + { "Summary", no_argument, 0, 'S' }, + { "TCC", required_argument, 0, 'T' }, + { "version", no_argument, 0, 'v' }, + { 0, 0, 0, 0 } + }; + + progname = argv[0]; + + while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) { + switch (opt) { + case 'a': + parse_add_command(optarg); + break; + case 'c': + parse_cpu_command(optarg); + break; + case 'D': + dump_only++; + break; + case 'e': + /* --enable specified counter */ + bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST); + break; + case 'd': + debug++; + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + break; + case 'H': + /* + * --hide: do not show those specified + * multiple invocations simply clear more bits in enabled mask + */ + bic_enabled &= ~bic_lookup(optarg, HIDE_LIST); + break; + case 'h': + default: + help(); + exit(1); + case 'i': + { + double interval = strtod(optarg, NULL); + + if (interval < 0.001) { + fprintf(outf, "interval %f seconds is too small\n", interval); + exit(2); + } + + interval_tv.tv_sec = interval_ts.tv_sec = interval; + interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000; + interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000; + } + break; + case 'J': + rapl_joules++; + break; + case 'l': + ENABLE_BIC(BIC_DISABLED_BY_DEFAULT); + list_header_only++; + quiet++; + break; + case 'o': + outf = fopen_or_die(optarg, "w"); + break; + case 'q': + quiet = 1; + break; + case 'n': + num_iterations = strtod(optarg, NULL); + + if (num_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", num_iterations); + exit(2); + } + break; + case 'N': + header_iterations = strtod(optarg, NULL); + + if (header_iterations <= 0) { + fprintf(outf, "iterations %d should be positive number\n", header_iterations); + exit(2); + } + break; + case 's': + /* + * --show: show only those specified + * The 1st invocation will clear and replace the enabled mask + * subsequent invocations can add to it. + */ + if (shown == 0) + bic_enabled = bic_lookup(optarg, SHOW_LIST); + else + bic_enabled |= bic_lookup(optarg, SHOW_LIST); + shown = 1; + break; + case 'S': + summary_only++; + break; + case 'T': + tj_max_override = atoi(optarg); + break; + case 'v': + print_version(); + exit(0); + break; + } + } +} + +int main(int argc, char **argv) +{ + outf = stderr; + cmdline(argc, argv); + + if (!quiet) { + print_version(); + print_bootcmd(); + } + + probe_sysfs(); + + turbostat_init(); + + msr_sum_record(); + + /* dump counters and exit */ + if (dump_only) + return get_and_dump_counters(); + + /* list header and exit */ + if (list_header_only) { + print_header(","); + flush_output_stdout(); + return 0; + } + + /* + * if any params left, it must be a command to fork + */ + if (argc - optind) + return fork_it(argv + optind); + else + turbostat_loop(); + + return 0; +} diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile new file mode 100644 index 000000000..666b325a6 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +CC = $(CROSS_COMPILE)gcc +BUILD_OUTPUT := $(CURDIR) +PREFIX := /usr +DESTDIR := + +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + +x86_energy_perf_policy : x86_energy_perf_policy.c +override CFLAGS += -O2 -Wall -I../../../include +override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' +override CFLAGS += -D_FORTIFY_SOURCE=2 + +%: %.c + @mkdir -p $(BUILD_OUTPUT) + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) + +.PHONY : clean +clean : + @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy + +install : x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/bin + install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy + install -d $(DESTDIR)$(PREFIX)/share/man/man8 + install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8 + diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 new file mode 100644 index 000000000..78c636189 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -0,0 +1,213 @@ +.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com> +.\" Distributed under the GPL, Copyleft 1994. +.TH X86_ENERGY_PERF_POLICY 8 +.SH NAME +x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers +.SH SYNOPSIS +.B x86_energy_perf_policy +.RB "[ options ] [ scope ] [field \ value]" +.br +.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list" +.br +.RB "cpu-list, pkg-list: # | #,# | #-# | all" +.br +.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired" +.br +.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)" +.br +.RB "value: # | default | performance | balance-performance | balance-power | power" +.SH DESCRIPTION +\fBx86_energy_perf_policy\fP +displays and updates energy-performance policy settings specific to +Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR) +updates, no matter if the Linux cpufreq sub-system is enabled or not. + +Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB) +may affect a wide range of hardware decisions, +such as how aggressively the hardware enters and exits CPU idle states (C-states) +and Processor Performance States (P-states). +This policy hint does not replace explicit OS C-state and P-state selection. +Rather, it tells the hardware how aggressively to implement those selections. +Further, it allows the OS to influence energy/performance trade-offs where there +is no software interface, such as in the opportunistic "turbo-mode" P-state range. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU, +but some implementations +share a single MSR among all CPUs in each processor package. +On those systems, a write to EPB on one processor will +be visible, and will have an effect, on all CPUs +in the same processor package. + +Hardware P-States (HWP) are effectively an expansion of hardware +P-state control from the opportunistic turbo-mode P-state range +to include the entire range of available P-states. +On Broadwell Xeon, the initial HWP implementation, EPB influenced HWP. +That influence was removed in subsequent generations, +where it was moved to the +Energy_Performance_Preference (EPP) field in +a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG. + +EPP is the most commonly managed knob in HWP mode, +but MSR_IA32_HWP_REQUEST also allows the user to specify +minimum-frequency for Quality-of-Service, +and maximum-frequency for power-capping. +MSR_IA32_HWP_REQUEST is defined per-CPU. + +MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST, +but it can simultaneously set the default policy for all CPUs within a package. +A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is +over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG. + +MSR_HWP_CAPABILITIES shows the default values for the fields +in MSR_IA32_HWP_REQUEST. It is displayed when no values +are being written. + +.SS SCOPE OPTIONS +.PP +\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list. +The CPU-list may be comma-separated CPU numbers, with dash for range +or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'. +When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu +MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1), +or exempt from MSR_IA32_HWP_REQUEST_PKG (0). + +\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list. +The list is a string of individual package numbers separated +by commas, and or ranges of package numbers separated by a dash, +or the string "all". +For example '--pkg 1,3' or '--pkg all' + +.SS VALUE OPTIONS +.PP +.I normal | default +Set a policy with a normal balance between performance and energy efficiency. +The processor will tolerate minor performance compromise +for potentially significant energy savings. +This is a reasonable default for most desktops and servers. +"default" is a synonym for "normal". +.PP +.I performance +Set a policy for maximum performance, +accepting no performance sacrifice for the benefit of energy efficiency. +.PP +.I balance-performance +Set a policy with a high priority on performance, +but allowing some performance loss to benefit energy efficiency. +.PP +.I balance-power +Set a policy where the performance and power are balanced. +This is the default. +.PP +.I power +Set a policy where the processor can accept +a measurable performance impact to maximize energy efficiency. + +.PP +The following table shows the mapping from the value strings above to actual MSR values. +This mapping is defined in the Linux-kernel header, msr-index.h. + +.nf +VALUE STRING EPB EPP +performance 0 0 +balance-performance 4 128 +normal, default 6 128 +balance-power 8 192 +power 15 255 +.fi +.PP +For MSR_IA32_HWP_REQUEST performance fields +(--hwp-min, --hwp-max, --hwp-desired), the value option +is in units of 100 MHz, Eg. 12 signifies 1200 MHz. + +.SS FIELD OPTIONS +\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with +the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled. +Thus "--all normal" will set a system without cpufreq into a well known configuration. +.PP +\fB-B, --epb\fP set EPB per-core or per-package. +See value strings in the table above. +.PP +\fB-d, --debug\fP debug increases verbosity. By default +x86_energy_perf_policy is silent for updates, +and verbose for read-only mode. +.PP +\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package. +See value strings in the table above. +.PP +\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.min. +.PP +\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio. +The "default" is the value found in IA32_HWP_CAPABILITIES.max. +.PP +\fB-D, --hwp-desired\fP request HWP 'desired' frequency. +The "normal" setting is 0, which +corresponds to 'full autonomous' HWP control. +Non-zero performance values request a specific performance +level on this processor, specified in multiples of 100 MHz. +.PP +\fB-w, --hwp-window\fP specify integer number of microsec +in the sliding window that HWP uses to maintain average frequency. +This parameter is meaningful only when the "desired" field above is non-zero. +Default is 0, allowing the HW to choose. +.SH OTHER OPTIONS +.PP +\fB-f, --force\fP writes the specified values without bounds checking. +.PP +\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu, +indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1) +or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings. +The default is exempt. +.PP +\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode. +.PP +\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode. +.PP +\fB-v, --version\fP print version and exit. +.PP +If no request to change policy is made, +the default behavior is to read +and display the current system state, +including the default capabilities. +.SH WARNING +.PP +This utility writes directly to Model Specific Registers. +There is no locking or coordination should this utility +be used to modify HWP limit fields at the same time that +intel_pstate's sysfs attributes access the same MSRs. +.PP +Note that --hwp-desired and --hwp-window are considered experimental. +Future versions of Linux reserve the right to access these +fields internally -- potentially conflicting with user-space access. +.SH EXAMPLE +.nf +# sudo x86_energy_perf_policy +cpu0: EPB 6 +cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu1: EPB 6 +cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu2: EPB 6 +cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35 +cpu3: EPB 6 +cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0 +cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35 +.fi +.SH NOTES +.B "x86_energy_perf_policy" +runs only as root. +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi +.SH "SEE ALSO" +.nf +msr(4) +Intel(R) 64 and IA-32 Architectures Software Developer's Manual +.fi +.PP +.SH AUTHORS +.nf +Len Brown diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c new file mode 100644 index 000000000..5fd9e5940 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -0,0 +1,1559 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * x86_energy_perf_policy -- set the energy versus performance + * policy preference bias on recent X86 processors. + */ +/* + * Copyright (c) 2010 - 2017 Intel Corporation. + * Len Brown <len.brown@intel.com> + */ + +#define _GNU_SOURCE +#include MSRHEADER +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <getopt.h> +#include <err.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <cpuid.h> +#include <errno.h> + +#define OPTARG_NORMAL (INT_MAX - 1) +#define OPTARG_POWER (INT_MAX - 2) +#define OPTARG_BALANCE_POWER (INT_MAX - 3) +#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4) +#define OPTARG_PERFORMANCE (INT_MAX - 5) + +struct msr_hwp_cap { + unsigned char highest; + unsigned char guaranteed; + unsigned char efficient; + unsigned char lowest; +}; + +struct msr_hwp_request { + unsigned char hwp_min; + unsigned char hwp_max; + unsigned char hwp_desired; + unsigned char hwp_epp; + unsigned int hwp_window; + unsigned char hwp_use_pkg; +} req_update; + +unsigned int debug; +unsigned int verbose; +unsigned int force; +char *progname; +int base_cpu; +unsigned char update_epb; +unsigned long long new_epb; +unsigned char turbo_is_enabled; +unsigned char update_turbo; +unsigned char turbo_update_value; +unsigned char update_hwp_epp; +unsigned char update_hwp_min; +unsigned char update_hwp_max; +unsigned char update_hwp_desired; +unsigned char update_hwp_window; +unsigned char update_hwp_use_pkg; +unsigned char update_hwp_enable; +#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg) +int max_cpu_num; +int max_pkg_num; +#define MAX_PACKAGES 64 +unsigned int first_cpu_in_pkg[MAX_PACKAGES]; +unsigned long long pkg_present_set; +unsigned long long pkg_selected_set; +cpu_set_t *cpu_present_set; +cpu_set_t *cpu_selected_set; +int genuine_intel; + +size_t cpu_setsize; + +char *proc_stat = "/proc/stat"; + +unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */ +unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */ + /* IA32_HWP_REQUEST, IA32_HWP_STATUS */ +unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */ +unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */ +unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */ +unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */ + +unsigned int bdx_highest_ratio; + +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define SYSFS_PATH_MAX 255 + +/* + * maintain compatibility with original implementation, but don't document it: + */ +void usage(void) +{ + fprintf(stderr, "%s [options] [scope][field value]\n", progname); + fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n"); + fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n"); + fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n"); + fprintf(stderr, + "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n"); + fprintf(stderr, "--hwp-window usec\n"); + + fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n"); + fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname); + + exit(1); +} + +/* + * If bdx_highest_ratio is set, + * then we must translate between MSR format and simple ratio + * used on the cmdline. + */ +int ratio_2_msr_perf(int ratio) +{ + int msr_perf; + + if (!bdx_highest_ratio) + return ratio; + + msr_perf = ratio * 255 / bdx_highest_ratio; + + if (debug) + fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio); + + return msr_perf; +} +int msr_perf_2_ratio(int msr_perf) +{ + int ratio; + double d; + + if (!bdx_highest_ratio) + return msr_perf; + + d = (double)msr_perf * (double) bdx_highest_ratio / 255.0; + d = d + 0.5; /* round */ + ratio = (int)d; + + if (debug) + fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d); + + return ratio; +} +int parse_cmdline_epb(int i) +{ + if (!has_epb) + errx(1, "EPB not enabled on this platform"); + + update_epb = 1; + + switch (i) { + case OPTARG_POWER: + return ENERGY_PERF_BIAS_POWERSAVE; + case OPTARG_BALANCE_POWER: + return ENERGY_PERF_BIAS_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + return ENERGY_PERF_BIAS_NORMAL; + case OPTARG_BALANCE_PERFORMANCE: + return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return ENERGY_PERF_BIAS_PERFORMANCE; + } + if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE) + errx(1, "--epb must be from 0 to 15"); + return i; +} + +#define HWP_CAP_LOWEST 0 +#define HWP_CAP_HIGHEST 255 + +/* + * "performance" changes hwp_min to cap.highest + * All others leave it at cap.lowest + */ +int parse_cmdline_hwp_min(int i) +{ + update_hwp_min = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_CAP_LOWEST; + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * "power" changes hwp_max to cap.lowest + * All others leave it at cap.highest + */ +int parse_cmdline_hwp_max(int i) +{ + update_hwp_max = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_CAP_LOWEST; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return HWP_CAP_HIGHEST; + } + return i; +} +/* + * for --hwp-des, all strings leave it in autonomous mode + * If you want to change it, you need to explicitly pick a value + */ +int parse_cmdline_hwp_desired(int i) +{ + update_hwp_desired = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_NORMAL: + case OPTARG_PERFORMANCE: + return 0; /* autonomous */ + } + return i; +} + +int parse_cmdline_hwp_window(int i) +{ + unsigned int exponent; + + update_hwp_window = 1; + + switch (i) { + case OPTARG_POWER: + case OPTARG_BALANCE_POWER: + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 0; + } + if (i < 0 || i > 1270000000) { + fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n"); + usage(); + } + for (exponent = 0; ; ++exponent) { + if (debug) + printf("%d 10^%d\n", i, exponent); + + if (i <= 127) + break; + + i = i / 10; + } + if (debug) + fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i); + + return (exponent << 7) | i; +} +int parse_cmdline_hwp_epp(int i) +{ + update_hwp_epp = 1; + + switch (i) { + case OPTARG_POWER: + return HWP_EPP_POWERSAVE; + case OPTARG_BALANCE_POWER: + return HWP_EPP_BALANCE_POWERSAVE; + case OPTARG_NORMAL: + case OPTARG_BALANCE_PERFORMANCE: + return HWP_EPP_BALANCE_PERFORMANCE; + case OPTARG_PERFORMANCE: + return HWP_EPP_PERFORMANCE; + } + if (i < 0 || i > 0xff) { + fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n"); + usage(); + } + return i; +} +int parse_cmdline_turbo(int i) +{ + update_turbo = 1; + + switch (i) { + case OPTARG_POWER: + return 0; + case OPTARG_NORMAL: + case OPTARG_BALANCE_POWER: + case OPTARG_BALANCE_PERFORMANCE: + case OPTARG_PERFORMANCE: + return 1; + } + if (i < 0 || i > 1) { + fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n"); + usage(); + } + return i; +} + +int parse_optarg_string(char *s) +{ + int i; + char *endptr; + + if (!strncmp(s, "default", 7)) + return OPTARG_NORMAL; + + if (!strncmp(s, "normal", 6)) + return OPTARG_NORMAL; + + if (!strncmp(s, "power", 9)) + return OPTARG_POWER; + + if (!strncmp(s, "balance-power", 17)) + return OPTARG_BALANCE_POWER; + + if (!strncmp(s, "balance-performance", 19)) + return OPTARG_BALANCE_PERFORMANCE; + + if (!strncmp(s, "performance", 11)) + return OPTARG_PERFORMANCE; + + i = strtol(s, &endptr, 0); + if (s == endptr) { + fprintf(stderr, "no digits in \"%s\"\n", s); + usage(); + } + if (i == LONG_MIN || i == LONG_MAX) + errx(-1, "%s", s); + + if (i > 0xFF) + errx(-1, "%d (0x%x) must be < 256", i, i); + + if (i < 0) + errx(-1, "%d (0x%x) must be >= 0", i, i); + return i; +} + +void parse_cmdline_all(char *s) +{ + force++; + update_hwp_enable = 1; + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s)); + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s)); + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s)); + if (has_epb) + new_epb = parse_cmdline_epb(parse_optarg_string(s)); + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s)); + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s)); + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s)); +} + +void validate_cpu_selected_set(void) +{ + int cpu; + + if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0) + errx(0, "no CPUs requested"); + + for (cpu = 0; cpu <= max_cpu_num; ++cpu) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set)) + if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + errx(1, "Requested cpu% is not present", cpu); + } +} + +void parse_cmdline_cpu(char *s) +{ + char *startp, *endp; + int cpu = 0; + + if (pkg_selected_set) { + usage(); + errx(1, "--cpu | --pkg"); + } + cpu_selected_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_selected_set == NULL) + err(1, "cpu_selected_set"); + CPU_ZERO_S(cpu_setsize, cpu_selected_set); + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_cpu; + + startp++; + end_cpu = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (cpu <= end_cpu) { + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + cpu++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 1) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + /* "--cpu even" is not documented */ + if (strncmp(startp, "even", 4) == 0) { + for (cpu = 0; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 4; + if (*startp == 0) + break; + } + + /* "--cpu odd" is not documented */ + if (strncmp(startp, "odd", 3) == 0) { + for (cpu = 1; cpu <= max_cpu_num; cpu += 2) { + if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set)) + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + } + startp += 3; + if (*startp == 0) + break; + } + + cpu = strtol(startp, &endp, 10); + if (startp == endp) + errx(1, "--cpu cpu-set: confused by '%s'", startp); + if (cpu > max_cpu_num) + errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num); + CPU_SET_S(cpu, cpu_setsize, cpu_selected_set); + startp = endp; + } + + validate_cpu_selected_set(); + +} + +void parse_cmdline_pkg(char *s) +{ + char *startp, *endp; + int pkg = 0; + + if (cpu_selected_set) { + usage(); + errx(1, "--pkg | --cpu"); + } + pkg_selected_set = 0; + + for (startp = s; startp && *startp;) { + + if (*startp == ',') { + startp++; + continue; + } + + if (*startp == '-') { + int end_pkg; + + startp++; + end_pkg = strtol(startp, &endp, 10); + if (startp == endp) + continue; + + while (pkg <= end_pkg) { + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + pkg++; + } + startp = endp; + continue; + } + + if (strncmp(startp, "all", 3) == 0) { + pkg_selected_set = pkg_present_set; + return; + } + + pkg = strtol(startp, &endp, 10); + if (pkg > max_pkg_num) + errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num); + pkg_selected_set |= 1 << pkg; + startp = endp; + } +} + +void for_packages(unsigned long long pkg_set, int (func)(int)) +{ + int pkg_num; + + for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) { + if (pkg_set & (1UL << pkg_num)) + func(pkg_num); + } +} + +void print_version(void) +{ + printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n"); +} + +void cmdline(int argc, char **argv) +{ + int opt; + int option_index = 0; + + static struct option long_options[] = { + {"all", required_argument, 0, 'a'}, + {"cpu", required_argument, 0, 'c'}, + {"pkg", required_argument, 0, 'p'}, + {"debug", no_argument, 0, 'd'}, + {"hwp-desired", required_argument, 0, 'D'}, + {"epb", required_argument, 0, 'B'}, + {"force", no_argument, 0, 'f'}, + {"hwp-enable", no_argument, 0, 'e'}, + {"help", no_argument, 0, 'h'}, + {"hwp-epp", required_argument, 0, 'P'}, + {"hwp-min", required_argument, 0, 'm'}, + {"hwp-max", required_argument, 0, 'M'}, + {"read", no_argument, 0, 'r'}, + {"turbo-enable", required_argument, 0, 't'}, + {"hwp-use-pkg", required_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"hwp-window", required_argument, 0, 'w'}, + {0, 0, 0, 0 } + }; + + progname = argv[0]; + + while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw:", + long_options, &option_index)) != -1) { + switch (opt) { + case 'a': + parse_cmdline_all(optarg); + break; + case 'B': + new_epb = parse_cmdline_epb(parse_optarg_string(optarg)); + break; + case 'c': + parse_cmdline_cpu(optarg); + break; + case 'e': + update_hwp_enable = 1; + break; + case 'h': + usage(); + break; + case 'd': + debug++; + verbose++; + break; + case 'f': + force++; + break; + case 'D': + req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg)); + break; + case 'm': + req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg)); + break; + case 'M': + req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg)); + break; + case 'p': + parse_cmdline_pkg(optarg); + break; + case 'P': + req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg)); + break; + case 'r': + /* v1 used -r to specify read-only mode, now the default */ + break; + case 't': + turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg)); + break; + case 'u': + update_hwp_use_pkg++; + if (atoi(optarg) == 0) + req_update.hwp_use_pkg = 0; + else + req_update.hwp_use_pkg = 1; + break; + case 'v': + print_version(); + exit(0); + break; + case 'w': + req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg)); + break; + default: + usage(); + } + } + /* + * v1 allowed "performance"|"normal"|"power" with no policy specifier + * to update BIAS. Continue to support that, even though no longer documented. + */ + if (argc == optind + 1) + new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind])); + + if (argc > optind + 1) { + fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]); + usage(); + } +} + +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +void err_on_hypervisor(void) +{ + FILE *cpuinfo; + char *flags, *hypervisor; + char *buffer; + + /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ + cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + + buffer = malloc(4096); + if (!buffer) { + fclose(cpuinfo); + err(-ENOMEM, "buffer malloc fail"); + } + + if (!fread(buffer, 1024, 1, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + + flags = strstr(buffer, "flags"); + rewind(cpuinfo); + fseek(cpuinfo, flags - buffer, SEEK_SET); + if (!fgets(buffer, 4096, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + fclose(cpuinfo); + + hypervisor = strstr(buffer, "hypervisor"); + + free(buffer); + + if (hypervisor) + err(-1, + "not supported on this virtual machine"); +} + +int get_msr(int cpu, int offset, unsigned long long *msr) +{ + int retval; + char pathname[32]; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDONLY); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pread(fd, msr, sizeof(*msr), offset); + if (retval != sizeof(*msr)) { + err_on_hypervisor(); + err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + } + + if (debug > 1) + fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); + + close(fd); + return 0; +} + +int put_msr(int cpu, int offset, unsigned long long new_msr) +{ + char pathname[32]; + int retval; + int fd; + + sprintf(pathname, "/dev/cpu/%d/msr", cpu); + fd = open(pathname, O_RDWR); + if (fd < 0) + err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); + + retval = pwrite(fd, &new_msr, sizeof(new_msr), offset); + if (retval != sizeof(new_msr)) + err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval); + + close(fd); + + if (debug > 1) + fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr); + + return 0; +} + +static unsigned int read_sysfs(const char *path, char *buf, size_t buflen) +{ + ssize_t numread; + int fd; + + fd = open(path, O_RDONLY); + if (fd == -1) + return 0; + + numread = read(fd, buf, buflen - 1); + if (numread < 1) { + close(fd); + return 0; + } + + buf[numread] = '\0'; + close(fd); + + return (unsigned int) numread; +} + +static unsigned int write_sysfs(const char *path, char *buf, size_t buflen) +{ + ssize_t numwritten; + int fd; + + fd = open(path, O_WRONLY); + if (fd == -1) + return 0; + + numwritten = write(fd, buf, buflen - 1); + if (numwritten < 1) { + perror("write failed\n"); + close(fd); + return -1; + } + + close(fd); + + return (unsigned int) numwritten; +} + +void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + printf("HWP_CAP: low %d eff %d guar %d high %d\n", + cap->lowest, cap->efficient, cap->guaranteed, cap->highest); +} +void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr)); + cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr)); + cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr)); + cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr)); +} + +void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str) +{ + if (cpu != -1) + printf("cpu%d: ", cpu); + + if (str) + printf("%s", str); + + printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg); +} +void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str) +{ + printf("pkg%d: ", pkg); + + if (str) + printf("%s", str); + + printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n", + h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp, + h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7); +} +void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr; + + get_msr(cpu, msr_offset, &msr); + + hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff)); + hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff)); + hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff)); + hwp_req->hwp_epp = (((msr) >> 24) & 0xff); + hwp_req->hwp_window = (((msr) >> 32) & 0x3ff); + hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1); +} + +void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset) +{ + unsigned long long msr = 0; + + if (debug > 1) + printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n", + cpu, hwp_req->hwp_min, hwp_req->hwp_max, + hwp_req->hwp_desired, hwp_req->hwp_epp, + hwp_req->hwp_window, hwp_req->hwp_use_pkg); + + msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min)); + msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max)); + msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired)); + msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp); + msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window); + msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg); + + put_msr(cpu, msr_offset, msr); +} + +static int get_epb(int cpu) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[3]; + char *endp; + long val; + + if (!has_epb) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + + if (!read_sysfs(path, linebuf, 3)) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + return (int)val; +} + +static int set_epb(int cpu, int val) +{ + char path[SYSFS_PATH_MAX]; + char linebuf[3]; + char *endp; + int ret; + + if (!has_epb) + return -1; + + snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/power/energy_perf_bias", cpu); + snprintf(linebuf, sizeof(linebuf), "%d", val); + + ret = write_sysfs(path, linebuf, 3); + if (ret <= 0) + return -1; + + val = strtol(linebuf, &endp, 0); + if (endp == linebuf || errno == ERANGE) + return -1; + + return (int)val; +} + +int print_cpu_msrs(int cpu) +{ + struct msr_hwp_request req; + struct msr_hwp_cap cap; + int epb; + + epb = get_epb(cpu); + if (epb >= 0) + printf("cpu%d: EPB %u\n", cpu, (unsigned int) epb); + + if (!has_hwp) + return 0; + + read_hwp_request(cpu, &req, MSR_HWP_REQUEST); + print_hwp_request(cpu, &req, ""); + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + print_hwp_cap(cpu, &cap, ""); + + return 0; +} + +int print_pkg_msrs(int pkg) +{ + struct msr_hwp_request req; + unsigned long long msr; + + if (!has_hwp) + return 0; + + read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG); + print_hwp_request_pkg(pkg, &req, ""); + + if (has_hwp_notify) { + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n", + pkg, msr, + ((msr) & 0x2) ? "EN" : "Dis", + ((msr) & 0x1) ? "EN" : "Dis"); + } + get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr); + fprintf(stderr, + "pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n", + pkg, msr, + ((msr) & 0x4) ? "" : "No-", + ((msr) & 0x1) ? "" : "No-"); + + return 0; +} + +/* + * Assumption: All HWP systems have 100 MHz bus clock + */ +int ratio_2_sysfs_khz(int ratio) +{ + int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */ + + return ratio * bclk_khz; +} +/* + * If HWP is enabled and cpufreq sysfs attribtes are present, + * then update sysfs, so that it will not become + * stale when we write to MSRs. + * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq, + * so we don't have to touch that.) + */ +void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio) +{ + char pathname[64]; + FILE *fp; + int retval; + int khz; + + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq", + cpu, is_max ? "max" : "min"); + + fp = fopen(pathname, "w"); + if (!fp) { + if (debug) + perror(pathname); + return; + } + + khz = ratio_2_sysfs_khz(ratio); + retval = fprintf(fp, "%d", khz); + if (retval < 0) + if (debug) + perror("fprintf"); + if (debug) + printf("echo %d > %s\n", khz, pathname); + + fclose(fp); +} + +/* + * We update all sysfs before updating any MSRs because of + * bugs in cpufreq/intel_pstate where the sysfs writes + * for a CPU may change the min/max values on other CPUS. + */ + +int update_sysfs(int cpu) +{ + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK)) + return 0; + + if (update_hwp_min) + update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min); + + if (update_hwp_max) + update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max); + + return 0; +} + +int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req) +{ + /* fail if min > max requested */ + if (req->hwp_min > req->hwp_max) { + errx(1, "cpu%d: requested hwp-min %d > hwp_max %d", + cpu, req->hwp_min, req->hwp_max); + } + + /* fail if desired > max requestd */ + if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) { + errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d", + cpu, req->hwp_desired, req->hwp_max); + } + /* fail if desired < min requestd */ + if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) { + errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d", + cpu, req->hwp_desired, req->hwp_min); + } + + return 0; +} + +int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap) +{ + if (update_hwp_max) { + if (req->hwp_max > cap->highest) + errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?", + cpu, req->hwp_max, cap->highest); + if (req->hwp_max < cap->lowest) + errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?", + cpu, req->hwp_max, cap->lowest); + } + + if (update_hwp_min) { + if (req->hwp_min > cap->highest) + errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?", + cpu, req->hwp_min, cap->highest); + if (req->hwp_min < cap->lowest) + errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?", + cpu, req->hwp_min, cap->lowest); + } + + if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max)) + errx(1, "cpu%d: requested min %d > requested max %d", + cpu, req->hwp_min, req->hwp_max); + + if (update_hwp_desired && req->hwp_desired) { + if (req->hwp_desired > req->hwp_max) + errx(1, "cpu%d: requested desired %d > requested max %d, use --force?", + cpu, req->hwp_desired, req->hwp_max); + if (req->hwp_desired < req->hwp_min) + errx(1, "cpu%d: requested desired %d < requested min %d, use --force?", + cpu, req->hwp_desired, req->hwp_min); + if (req->hwp_desired < cap->lowest) + errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?", + cpu, req->hwp_desired, cap->lowest); + if (req->hwp_desired > cap->highest) + errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?", + cpu, req->hwp_desired, cap->highest); + } + + return 0; +} + +int update_hwp_request(int cpu) +{ + struct msr_hwp_request req; + struct msr_hwp_cap cap; + + int msr_offset = MSR_HWP_REQUEST; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request(cpu, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + req.hwp_use_pkg = req_update.hwp_use_pkg; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); + + write_hwp_request(cpu, &req, msr_offset); + + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request(cpu, &req, "new: "); + } + return 0; +} +int update_hwp_request_pkg(int pkg) +{ + struct msr_hwp_request req; + struct msr_hwp_cap cap; + int cpu = first_cpu_in_pkg[pkg]; + + int msr_offset = MSR_HWP_REQUEST_PKG; + + read_hwp_request(cpu, &req, msr_offset); + if (debug) + print_hwp_request_pkg(pkg, &req, "old: "); + + if (update_hwp_min) + req.hwp_min = req_update.hwp_min; + + if (update_hwp_max) + req.hwp_max = req_update.hwp_max; + + if (update_hwp_desired) + req.hwp_desired = req_update.hwp_desired; + + if (update_hwp_window) + req.hwp_window = req_update.hwp_window; + + if (update_hwp_epp) + req.hwp_epp = req_update.hwp_epp; + + read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES); + if (debug) + print_hwp_cap(cpu, &cap, ""); + + if (!force) + check_hwp_request_v_hwp_capabilities(cpu, &req, &cap); + + verify_hwp_req_self_consistency(cpu, &req); + + write_hwp_request(cpu, &req, msr_offset); + + if (debug) { + read_hwp_request(cpu, &req, msr_offset); + print_hwp_request_pkg(pkg, &req, "new: "); + } + return 0; +} + +int enable_hwp_on_cpu(int cpu) +{ + unsigned long long msr; + + get_msr(cpu, MSR_PM_ENABLE, &msr); + put_msr(cpu, MSR_PM_ENABLE, 1); + + if (verbose) + printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1); + + return 0; +} + +int update_cpu_msrs(int cpu) +{ + unsigned long long msr; + int epb; + + if (update_epb) { + epb = get_epb(cpu); + set_epb(cpu, new_epb); + + if (verbose) + printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n", + cpu, epb, (unsigned int) new_epb); + } + + if (update_turbo) { + int turbo_is_present_and_disabled; + + get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr); + + turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0); + + if (turbo_update_value == 1) { + if (turbo_is_present_and_disabled) { + msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo ENABLE\n", cpu); + } + } else { + /* + * if "turbo_is_enabled" were known to be describe this cpu + * then we could use it here to skip redundant disable requests. + * but cpu may be in a different package, so we always write. + */ + msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE; + put_msr(cpu, MSR_IA32_MISC_ENABLE, msr); + if (verbose) + printf("cpu%d: turbo DISABLE\n", cpu); + } + } + + if (!has_hwp) + return 0; + + if (!hwp_update_enabled()) + return 0; + + update_hwp_request(cpu); + return 0; +} + +unsigned int get_pkg_num(int cpu) +{ + FILE *fp; + char pathname[128]; + unsigned int pkg; + int retval; + + sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); + + fp = fopen_or_die(pathname, "r"); + retval = fscanf(fp, "%d\n", &pkg); + if (retval != 1) + errx(1, "%s: failed to parse", pathname); + return pkg; +} + +int set_max_cpu_pkg_num(int cpu) +{ + unsigned int pkg; + + if (max_cpu_num < cpu) + max_cpu_num = cpu; + + pkg = get_pkg_num(cpu); + + if (pkg >= MAX_PACKAGES) + errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES); + + if (pkg > max_pkg_num) + max_pkg_num = pkg; + + if ((pkg_present_set & (1ULL << pkg)) == 0) { + pkg_present_set |= (1ULL << pkg); + first_cpu_in_pkg[pkg] = cpu; + } + + return 0; +} +int mark_cpu_present(int cpu) +{ + CPU_SET_S(cpu, cpu_setsize, cpu_present_set); + return 0; +} + +/* + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number + */ +int for_all_proc_cpus(int (func)(int)) +{ + FILE *fp; + int cpu_num; + int retval; + + fp = fopen_or_die(proc_stat, "r"); + + retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); + if (retval != 0) + err(1, "%s: failed to parse format", proc_stat); + + while (1) { + retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); + if (retval != 1) + break; + + retval = func(cpu_num); + if (retval) { + fclose(fp); + return retval; + } + } + fclose(fp); + return 0; +} + +void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int)) +{ + int cpu_num; + + for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num) + if (CPU_ISSET_S(cpu_num, set_size, cpu_set)) + func(cpu_num); +} + +void init_data_structures(void) +{ + for_all_proc_cpus(set_max_cpu_pkg_num); + + cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1)); + + cpu_present_set = CPU_ALLOC((max_cpu_num + 1)); + if (cpu_present_set == NULL) + err(3, "CPU_ALLOC"); + CPU_ZERO_S(cpu_setsize, cpu_present_set); + for_all_proc_cpus(mark_cpu_present); +} + +/* clear has_hwp if it is not enable (or being enabled) */ + +void verify_hwp_is_enabled(void) +{ + unsigned long long msr; + + if (!has_hwp) /* set in early_cpuid() */ + return; + + /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */ + get_msr(base_cpu, MSR_PM_ENABLE, &msr); + if ((msr & 1) == 0) { + fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n"); + has_hwp = 0; + return; + } +} + +int req_update_bounds_check(void) +{ + if (!hwp_update_enabled()) + return 0; + + /* fail if min > max requested */ + if ((update_hwp_max && update_hwp_min) && + (req_update.hwp_min > req_update.hwp_max)) { + printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max); + return -EINVAL; + } + + /* fail if desired > max requestd */ + if (req_update.hwp_desired && update_hwp_max && + (req_update.hwp_desired > req_update.hwp_max)) { + printf("hwp-desired cannot be greater than hwp_max\n"); + return -EINVAL; + } + /* fail if desired < min requestd */ + if (req_update.hwp_desired && update_hwp_min && + (req_update.hwp_desired < req_update.hwp_min)) { + printf("hwp-desired cannot be less than hwp_min\n"); + return -EINVAL; + } + + return 0; +} + +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); +} + + +void probe_dev_msr(void) +{ + struct stat sb; + char pathname[32]; + + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); +} + +static void get_cpuid_or_exit(unsigned int leaf, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + if (!__get_cpuid(leaf, eax, ebx, ecx, edx)) + errx(1, "Processor not supported\n"); +} + +/* + * early_cpuid() + * initialize turbo_is_enabled, has_hwp, has_epb + * before cmdline is parsed + */ +void early_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int fms, family, model; + + get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (model == 0x4F) { + unsigned long long msr; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); + + bdx_highest_ratio = msr & 0xFF; + } + + get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx); + turbo_is_enabled = (eax >> 1) & 1; + has_hwp = (eax >> 7) & 1; + has_epb = (ecx >> 3) & 1; +} + +/* + * parse_cpuid() + * set + * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb + */ +void parse_cpuid(void) +{ + unsigned int eax, ebx, ecx, edx, max_level; + unsigned int fms, family, model, stepping; + + eax = ebx = ecx = edx = 0; + + get_cpuid_or_exit(0, &max_level, &ebx, &ecx, &edx); + + if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) + genuine_intel = 1; + + if (debug) + fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", + (char *)&ebx, (char *)&edx, (char *)&ecx); + + get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx); + family = (fms >> 8) & 0xf; + model = (fms >> 4) & 0xf; + stepping = fms & 0xf; + if (family == 6 || family == 0xf) + model += ((fms >> 16) & 0xf) << 4; + + if (debug) { + fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", + max_level, family, model, stepping, family, model, stepping); + fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n", + ecx & (1 << 0) ? "SSE3" : "-", + ecx & (1 << 3) ? "MONITOR" : "-", + ecx & (1 << 7) ? "EIST" : "-", + ecx & (1 << 8) ? "TM2" : "-", + edx & (1 << 4) ? "TSC" : "-", + edx & (1 << 5) ? "MSR" : "-", + edx & (1 << 22) ? "ACPI-TM" : "-", + edx & (1 << 29) ? "TM" : "-"); + } + + if (!(edx & (1 << 5))) + errx(1, "CPUID: no MSR"); + + + get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx); + /* turbo_is_enabled already set */ + /* has_hwp already set */ + has_hwp_notify = eax & (1 << 8); + has_hwp_activity_window = eax & (1 << 9); + has_hwp_epp = eax & (1 << 10); + has_hwp_request_pkg = eax & (1 << 11); + + if (!has_hwp_request_pkg && update_hwp_use_pkg) + errx(1, "--hwp-use-pkg is not available on this hardware"); + + /* has_epb already set */ + + if (debug) + fprintf(stderr, + "CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n", + turbo_is_enabled ? "" : "No-", + has_hwp ? "" : "No-", + has_hwp_notify ? "" : "No-", + has_hwp_activity_window ? "" : "No-", + has_hwp_epp ? "" : "No-", + has_hwp_request_pkg ? "" : "No-", + has_epb ? "" : "No-"); + + return; /* success */ +} + +int main(int argc, char **argv) +{ + set_base_cpu(); + probe_dev_msr(); + init_data_structures(); + + early_cpuid(); /* initial cpuid parse before cmdline */ + + cmdline(argc, argv); + + if (debug) + print_version(); + + parse_cpuid(); + + /* If CPU-set and PKG-set are not initialized, default to all CPUs */ + if ((cpu_selected_set == 0) && (pkg_selected_set == 0)) + cpu_selected_set = cpu_present_set; + + /* + * If HWP is being enabled, do it now, so that subsequent operations + * that access HWP registers can work. + */ + if (update_hwp_enable) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu); + + /* If HWP present, but disabled, warn and ignore from here forward */ + verify_hwp_is_enabled(); + + if (req_update_bounds_check()) + return -EINVAL; + + /* display information only, no updates to settings */ + if (!update_epb && !update_turbo && !hwp_update_enabled()) { + if (cpu_selected_set) + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs); + + if (has_hwp_request_pkg) { + if (pkg_selected_set == 0) + pkg_selected_set = pkg_present_set; + + for_packages(pkg_selected_set, print_pkg_msrs); + } + + return 0; + } + + /* update CPU set */ + if (cpu_selected_set) { + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs); + for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs); + } else if (pkg_selected_set) + for_packages(pkg_selected_set, update_hwp_request_pkg); + + return 0; +} |