From 8b0a8165cdad0f4133837d753649ef4682e42c3b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 7 Aug 2024 15:11:40 +0200 Subject: Merging upstream version 6.9.7. Signed-off-by: Daniel Baumann --- drivers/cpufreq/Kconfig | 29 +++ drivers/cpufreq/Kconfig.arm | 26 --- drivers/cpufreq/amd-pstate-ut.c | 3 +- drivers/cpufreq/amd-pstate.c | 432 ++++++++++++++++++++++++++++------- drivers/cpufreq/amd-pstate.h | 100 ++++++++ drivers/cpufreq/cpufreq-dt-platdev.c | 1 + drivers/cpufreq/cpufreq.c | 15 +- drivers/cpufreq/cpufreq_ondemand.c | 1 - drivers/cpufreq/imx6q-cpufreq.c | 45 ++-- drivers/cpufreq/intel_pstate.c | 46 +++- drivers/cpufreq/scmi-cpufreq.c | 46 +++- 11 files changed, 595 insertions(+), 149 deletions(-) create mode 100644 drivers/cpufreq/amd-pstate.h (limited to 'drivers/cpufreq') diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 35efb53d54..94e55c4097 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -302,4 +302,33 @@ config QORIQ_CPUFREQ which are capable of changing the CPU's frequency dynamically. endif + +config ACPI_CPPC_CPUFREQ + tristate "CPUFreq driver based on the ACPI CPPC spec" + depends on ACPI_PROCESSOR + depends on ARM || ARM64 || RISCV + select ACPI_CPPC_LIB + help + This adds a CPUFreq driver which uses CPPC methods + as described in the ACPIv5.1 spec. CPPC stands for + Collaborative Processor Performance Controls. It + is based on an abstract continuous scale of CPU + performance values which allows the remote power + processor to flexibly optimize for power and + performance. CPPC relies on power management firmware + support for its operation. + + If in doubt, say N. + +config ACPI_CPPC_CPUFREQ_FIE + bool "Frequency Invariance support for CPPC cpufreq driver" + depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY + depends on ARM || ARM64 || RISCV + default y + help + This extends frequency invariance support in the CPPC cpufreq driver, + by using CPPC delivered and reference performance counters. + + If in doubt, say N. + endmenu diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index a0ebad7766..96b404ce82 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -3,32 +3,6 @@ # ARM CPU Frequency scaling drivers # -config ACPI_CPPC_CPUFREQ - tristate "CPUFreq driver based on the ACPI CPPC spec" - depends on ACPI_PROCESSOR - select ACPI_CPPC_LIB - help - This adds a CPUFreq driver which uses CPPC methods - as described in the ACPIv5.1 spec. CPPC stands for - Collaborative Processor Performance Controls. It - is based on an abstract continuous scale of CPU - performance values which allows the remote power - processor to flexibly optimize for power and - performance. CPPC relies on power management firmware - support for its operation. - - If in doubt, say N. - -config ACPI_CPPC_CPUFREQ_FIE - bool "Frequency Invariance support for CPPC cpufreq driver" - depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY - default y - help - This extends frequency invariance support in the CPPC cpufreq driver, - by using CPPC delivered and reference performance counters. - - If in doubt, say N. - config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM tristate "Allwinner nvmem based SUN50I CPUFreq driver" depends on ARCH_SUNXI diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c index f04ae67dda..fc275d41d5 100644 --- a/drivers/cpufreq/amd-pstate-ut.c +++ b/drivers/cpufreq/amd-pstate-ut.c @@ -26,10 +26,11 @@ #include #include #include -#include #include +#include "amd-pstate.h" + /* * Abbreviations: * amd_pstate_ut: used as a shortform for AMD P-State unit test. diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c index 07f3419954..6af175e6c0 100644 --- a/drivers/cpufreq/amd-pstate.c +++ b/drivers/cpufreq/amd-pstate.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -45,10 +45,45 @@ #include #include #include + +#include "amd-pstate.h" #include "amd-pstate-trace.h" #define AMD_PSTATE_TRANSITION_LATENCY 20000 #define AMD_PSTATE_TRANSITION_DELAY 1000 +#define CPPC_HIGHEST_PERF_PERFORMANCE 196 +#define CPPC_HIGHEST_PERF_DEFAULT 166 + +#define AMD_CPPC_EPP_PERFORMANCE 0x00 +#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80 +#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF +#define AMD_CPPC_EPP_POWERSAVE 0xFF + +/* + * enum amd_pstate_mode - driver working mode of amd pstate + */ +enum amd_pstate_mode { + AMD_PSTATE_UNDEFINED = 0, + AMD_PSTATE_DISABLE, + AMD_PSTATE_PASSIVE, + AMD_PSTATE_ACTIVE, + AMD_PSTATE_GUIDED, + AMD_PSTATE_MAX, +}; + +static const char * const amd_pstate_mode_string[] = { + [AMD_PSTATE_UNDEFINED] = "undefined", + [AMD_PSTATE_DISABLE] = "disable", + [AMD_PSTATE_PASSIVE] = "passive", + [AMD_PSTATE_ACTIVE] = "active", + [AMD_PSTATE_GUIDED] = "guided", + NULL, +}; + +struct quirk_entry { + u32 nominal_freq; + u32 lowest_freq; +}; /* * TODO: We need more time to fine tune processors with shared memory solution @@ -64,6 +99,8 @@ static struct cpufreq_driver amd_pstate_driver; static struct cpufreq_driver amd_pstate_epp_driver; static int cppc_state = AMD_PSTATE_UNDEFINED; static bool cppc_enabled; +static bool amd_pstate_prefcore = true; +static struct quirk_entry *quirks; /* * AMD Energy Preference Performance (EPP) @@ -108,6 +145,41 @@ static unsigned int epp_values[] = { typedef int (*cppc_mode_transition_fn)(int); +static struct quirk_entry quirk_amd_7k62 = { + .nominal_freq = 2600, + .lowest_freq = 550, +}; + +static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi) +{ + /** + * match the broken bios for family 17h processor support CPPC V2 + * broken BIOS lack of nominal_freq and lowest_freq capabilities + * definition in ACPI tables + */ + if (boot_cpu_has(X86_FEATURE_ZEN2)) { + quirks = dmi->driver_data; + pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident); + return 1; + } + + return 0; +} + +static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = { + { + .callback = dmi_matched_7k62_bios_bug, + .ident = "AMD EPYC 7K62", + .matches = { + DMI_MATCH(DMI_BIOS_VERSION, "5.14"), + DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"), + }, + .driver_data = &quirk_amd_7k62, + }, + {} +}; +MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table); + static inline int get_mode_idx_from_str(const char *str, size_t size) { int i; @@ -287,6 +359,21 @@ static inline int amd_pstate_enable(bool enable) return static_call(amd_pstate_enable)(enable); } +static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata) +{ + struct cpuinfo_x86 *c = &cpu_data(0); + + /* + * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f, + * the highest performance level is set to 196. + * https://bugzilla.kernel.org/show_bug.cgi?id=218759 + */ + if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f)) + return CPPC_HIGHEST_PERF_PERFORMANCE; + + return CPPC_HIGHEST_PERF_DEFAULT; +} + static int pstate_init_perf(struct amd_cpudata *cpudata) { u64 cap1; @@ -297,13 +384,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - /* - * TODO: Introduce AMD specific power feature. - * - * CPPC entry doesn't indicate the highest performance in some ASICs. + /* For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as + * the default max perf. */ - highest_perf = amd_get_highest_perf(); - if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1)) + if (cpudata->hw_prefcore) + highest_perf = amd_pstate_highest_perf_set(cpudata); + else highest_perf = AMD_CPPC_HIGHEST_PERF(cap1); WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -311,6 +399,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1)); WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1)); WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1)); + WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1)); WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1)); return 0; } @@ -324,8 +413,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) if (ret) return ret; - highest_perf = amd_get_highest_perf(); - if (highest_perf > cppc_perf.highest_perf) + if (cpudata->hw_prefcore) + highest_perf = amd_pstate_highest_perf_set(cpudata); + else highest_perf = cppc_perf.highest_perf; WRITE_ONCE(cpudata->highest_perf, highest_perf); @@ -334,6 +424,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata) WRITE_ONCE(cpudata->lowest_nonlinear_perf, cppc_perf.lowest_nonlinear_perf); WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf); + WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf); WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf); if (cppc_state == AMD_PSTATE_ACTIVE) @@ -477,12 +568,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy) static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy) { - u32 max_limit_perf, min_limit_perf; + u32 max_limit_perf, min_limit_perf, lowest_perf; struct amd_cpudata *cpudata = policy->driver_data; max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + lowest_perf = READ_ONCE(cpudata->lowest_perf); + if (min_limit_perf < lowest_perf) + min_limit_perf = lowest_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); WRITE_ONCE(cpudata->max_limit_freq, policy->max); @@ -592,74 +690,22 @@ static void amd_pstate_adjust_perf(unsigned int cpu, static int amd_get_min_freq(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - - /* Switch to khz */ - return cppc_perf.lowest_freq * 1000; + return READ_ONCE(cpudata->min_freq); } static int amd_get_max_freq(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - u32 max_perf, max_freq, nominal_freq, nominal_perf; - u64 boost_ratio; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); - max_perf = READ_ONCE(cpudata->highest_perf); - - boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); - - max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT; - - /* Switch to khz */ - return max_freq * 1000; + return READ_ONCE(cpudata->max_freq); } static int amd_get_nominal_freq(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - - /* Switch to khz */ - return cppc_perf.nominal_freq * 1000; + return READ_ONCE(cpudata->nominal_freq); } static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata) { - struct cppc_perf_caps cppc_perf; - u32 lowest_nonlinear_freq, lowest_nonlinear_perf, - nominal_freq, nominal_perf; - u64 lowest_nonlinear_ratio; - - int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); - if (ret) - return ret; - - nominal_freq = cppc_perf.nominal_freq; - nominal_perf = READ_ONCE(cpudata->nominal_perf); - - lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf; - - lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, - nominal_perf); - - lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT; - - /* Switch to khz */ - return lowest_nonlinear_freq * 1000; + return READ_ONCE(cpudata->lowest_nonlinear_freq); } static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) @@ -675,7 +721,7 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state) if (state) policy->cpuinfo.max_freq = cpudata->max_freq; else - policy->cpuinfo.max_freq = cpudata->nominal_freq; + policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000; policy->max = policy->cpuinfo.max_freq; @@ -706,6 +752,169 @@ static void amd_perf_ctl_reset(unsigned int cpu) wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0); } +/* + * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks + * due to locking, so queue the work for later. + */ +static void amd_pstste_sched_prefcore_workfn(struct work_struct *work) +{ + sched_set_itmt_support(); +} +static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn); + +/* + * Get the highest performance register value. + * @cpu: CPU from which to get highest performance. + * @highest_perf: Return address. + * + * Return: 0 for success, -EIO otherwise. + */ +static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf) +{ + int ret; + + if (boot_cpu_has(X86_FEATURE_CPPC)) { + u64 cap1; + + ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1)); + } else { + u64 cppc_highest_perf; + + ret = cppc_get_highest_perf(cpu, &cppc_highest_perf); + if (ret) + return ret; + WRITE_ONCE(*highest_perf, cppc_highest_perf); + } + + return (ret); +} + +#define CPPC_MAX_PERF U8_MAX + +static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata) +{ + int ret, prio; + u32 highest_perf; + + ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf); + if (ret) + return; + + cpudata->hw_prefcore = true; + /* check if CPPC preferred core feature is enabled*/ + if (highest_perf < CPPC_MAX_PERF) + prio = (int)highest_perf; + else { + pr_debug("AMD CPPC preferred core is unsupported!\n"); + cpudata->hw_prefcore = false; + return; + } + + if (!amd_pstate_prefcore) + return; + + /* + * The priorities can be set regardless of whether or not + * sched_set_itmt_support(true) has been called and it is valid to + * update them at any time after it has been called. + */ + sched_set_itmt_core_prio(prio, cpudata->cpu); + + schedule_work(&sched_prefcore_work); +} + +static void amd_pstate_update_limits(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + struct amd_cpudata *cpudata = policy->driver_data; + u32 prev_high = 0, cur_high = 0; + int ret; + bool highest_perf_changed = false; + + mutex_lock(&amd_pstate_driver_lock); + if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore)) + goto free_cpufreq_put; + + ret = amd_pstate_get_highest_perf(cpu, &cur_high); + if (ret) + goto free_cpufreq_put; + + prev_high = READ_ONCE(cpudata->prefcore_ranking); + if (prev_high != cur_high) { + highest_perf_changed = true; + WRITE_ONCE(cpudata->prefcore_ranking, cur_high); + + if (cur_high < CPPC_MAX_PERF) + sched_set_itmt_core_prio((int)cur_high, cpu); + } + +free_cpufreq_put: + cpufreq_cpu_put(policy); + + if (!highest_perf_changed) + cpufreq_update_policy(cpu); + + mutex_unlock(&amd_pstate_driver_lock); +} + +/** + * amd_pstate_init_freq: Initialize the max_freq, min_freq, + * nominal_freq and lowest_nonlinear_freq for + * the @cpudata object. + * + * Requires: highest_perf, lowest_perf, nominal_perf and + * lowest_nonlinear_perf members of @cpudata to be + * initialized. + * + * Returns 0 on success, non-zero value on failure. + */ +static int amd_pstate_init_freq(struct amd_cpudata *cpudata) +{ + int ret; + u32 min_freq; + u32 highest_perf, max_freq; + u32 nominal_perf, nominal_freq; + u32 lowest_nonlinear_perf, lowest_nonlinear_freq; + u32 boost_ratio, lowest_nonlinear_ratio; + struct cppc_perf_caps cppc_perf; + + + ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf); + if (ret) + return ret; + + if (quirks && quirks->lowest_freq) + min_freq = quirks->lowest_freq * 1000; + else + min_freq = cppc_perf.lowest_freq * 1000; + + if (quirks && quirks->nominal_freq) + nominal_freq = quirks->nominal_freq ; + else + nominal_freq = cppc_perf.nominal_freq; + + nominal_perf = READ_ONCE(cpudata->nominal_perf); + + highest_perf = READ_ONCE(cpudata->highest_perf); + boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf); + max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + + lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf); + lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT, + nominal_perf); + lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000; + + WRITE_ONCE(cpudata->min_freq, min_freq); + WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq); + WRITE_ONCE(cpudata->nominal_freq, nominal_freq); + WRITE_ONCE(cpudata->max_freq, max_freq); + + return 0; +} + static int amd_pstate_cpu_init(struct cpufreq_policy *policy) { int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret; @@ -727,10 +936,16 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; + min_freq = amd_get_min_freq(cpudata); max_freq = amd_get_max_freq(cpudata); nominal_freq = amd_get_nominal_freq(cpudata); @@ -772,13 +987,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy) goto free_cpudata2; } - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; cpudata->max_limit_freq = max_freq; cpudata->min_limit_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; policy->driver_data = cpudata; @@ -877,6 +1087,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy, return sysfs_emit(buf, "%u\n", perf); } +static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy, + char *buf) +{ + u32 perf; + struct amd_cpudata *cpudata = policy->driver_data; + + perf = READ_ONCE(cpudata->prefcore_ranking); + + return sysfs_emit(buf, "%u\n", perf); +} + +static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy, + char *buf) +{ + bool hw_prefcore; + struct amd_cpudata *cpudata = policy->driver_data; + + hw_prefcore = READ_ONCE(cpudata->hw_prefcore); + + return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore)); +} + static ssize_t show_energy_performance_available_preferences( struct cpufreq_policy *policy, char *buf) { @@ -1074,18 +1306,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b, return ret < 0 ? ret : count; } +static ssize_t prefcore_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore)); +} + cpufreq_freq_attr_ro(amd_pstate_max_freq); cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq); cpufreq_freq_attr_ro(amd_pstate_highest_perf); +cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking); +cpufreq_freq_attr_ro(amd_pstate_hw_prefcore); cpufreq_freq_attr_rw(energy_performance_preference); cpufreq_freq_attr_ro(energy_performance_available_preferences); static DEVICE_ATTR_RW(status); +static DEVICE_ATTR_RO(prefcore); static struct freq_attr *amd_pstate_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, NULL, }; @@ -1093,6 +1336,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = { &amd_pstate_max_freq, &amd_pstate_lowest_nonlinear_freq, &amd_pstate_highest_perf, + &amd_pstate_prefcore_ranking, + &amd_pstate_hw_prefcore, &energy_performance_preference, &energy_performance_available_preferences, NULL, @@ -1100,6 +1345,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = { static struct attribute *pstate_global_attributes[] = { &dev_attr_status.attr, + &dev_attr_prefcore.attr, NULL }; @@ -1151,10 +1397,16 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) cpudata->cpu = policy->cpu; cpudata->epp_policy = 0; + amd_pstate_init_prefcore(cpudata); + ret = amd_pstate_init_perf(cpudata); if (ret) goto free_cpudata1; + ret = amd_pstate_init_freq(cpudata); + if (ret) + goto free_cpudata1; + min_freq = amd_get_min_freq(cpudata); max_freq = amd_get_max_freq(cpudata); nominal_freq = amd_get_nominal_freq(cpudata); @@ -1171,12 +1423,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy) /* It will be updated by governor */ policy->cur = policy->cpuinfo.min_freq; - /* Initial processor data capability frequencies */ - cpudata->max_freq = max_freq; - cpudata->min_freq = min_freq; - cpudata->nominal_freq = nominal_freq; - cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq; - policy->driver_data = cpudata; cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0); @@ -1216,6 +1462,13 @@ free_cpudata1: static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy) { + struct amd_cpudata *cpudata = policy->driver_data; + + if (cpudata) { + kfree(cpudata); + policy->driver_data = NULL; + } + pr_debug("CPU %d exiting\n", policy->cpu); return 0; } @@ -1232,6 +1485,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy) max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq); min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq); + if (min_limit_perf < min_perf) + min_limit_perf = min_perf; + + if (max_limit_perf < min_limit_perf) + max_limit_perf = min_limit_perf; + WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf); WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf); @@ -1432,6 +1691,7 @@ static struct cpufreq_driver amd_pstate_driver = { .suspend = amd_pstate_cpu_suspend, .resume = amd_pstate_cpu_resume, .set_boost = amd_pstate_set_boost, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate", .attr = amd_pstate_attr, }; @@ -1446,6 +1706,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = { .online = amd_pstate_epp_cpu_online, .suspend = amd_pstate_epp_suspend, .resume = amd_pstate_epp_resume, + .update_limits = amd_pstate_update_limits, .name = "amd-pstate-epp", .attr = amd_pstate_epp_attr, }; @@ -1486,6 +1747,11 @@ static int __init amd_pstate_init(void) if (cpufreq_get_current_driver()) return -EEXIST; + quirks = NULL; + + /* check if this machine need CPPC quirks */ + dmi_check_system(amd_pstate_quirks_table); + switch (cppc_state) { case AMD_PSTATE_UNDEFINED: /* Disable on the following configs by default: @@ -1567,7 +1833,17 @@ static int __init amd_pstate_param(char *str) return amd_pstate_set_driver(mode_idx); } + +static int __init amd_prefcore_param(char *str) +{ + if (!strcmp(str, "disable")) + amd_pstate_prefcore = false; + + return 0; +} + early_param("amd_pstate", amd_pstate_param); +early_param("amd_prefcore", amd_prefcore_param); MODULE_AUTHOR("Huang Rui "); MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver"); diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h new file mode 100644 index 0000000000..bc341f3590 --- /dev/null +++ b/drivers/cpufreq/amd-pstate.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * + * Author: Meng Li + */ + +#ifndef _LINUX_AMD_PSTATE_H +#define _LINUX_AMD_PSTATE_H + +#include + +/********************************************************************* + * AMD P-state INTERFACE * + *********************************************************************/ +/** + * struct amd_aperf_mperf + * @aperf: actual performance frequency clock count + * @mperf: maximum performance frequency clock count + * @tsc: time stamp counter + */ +struct amd_aperf_mperf { + u64 aperf; + u64 mperf; + u64 tsc; +}; + +/** + * struct amd_cpudata - private CPU data for AMD P-State + * @cpu: CPU number + * @req: constraint request to apply + * @cppc_req_cached: cached performance request hints + * @highest_perf: the maximum performance an individual processor may reach, + * assuming ideal conditions + * For platforms that do not support the preferred core feature, the + * highest_pef may be configured with 166 or 255, to avoid max frequency + * calculated wrongly. we take the fixed value as the highest_perf. + * @nominal_perf: the maximum sustained performance level of the processor, + * assuming ideal operating conditions + * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power + * savings are achieved + * @lowest_perf: the absolute lowest performance level of the processor + * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher + * priority. + * @max_freq: the frequency that mapped to highest_perf + * @min_freq: the frequency that mapped to lowest_perf + * @nominal_freq: the frequency that mapped to nominal_perf + * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf + * @cur: Difference of Aperf/Mperf/tsc count between last and current sample + * @prev: Last Aperf/Mperf/tsc count value read from register + * @freq: current cpu frequency value + * @boost_supported: check whether the Processor or SBIOS supports boost mode + * @hw_prefcore: check whether HW supports preferred core featue. + * Only when hw_prefcore and early prefcore param are true, + * AMD P-State driver supports preferred core featue. + * @epp_policy: Last saved policy used to set energy-performance preference + * @epp_cached: Cached CPPC energy-performance preference value + * @policy: Cpufreq policy value + * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value + * + * The amd_cpudata is key private data for each CPU thread in AMD P-State, and + * represents all the attributes and goals that AMD P-State requests at runtime. + */ +struct amd_cpudata { + int cpu; + + struct freq_qos_request req[2]; + u64 cppc_req_cached; + + u32 highest_perf; + u32 nominal_perf; + u32 lowest_nonlinear_perf; + u32 lowest_perf; + u32 prefcore_ranking; + u32 min_limit_perf; + u32 max_limit_perf; + u32 min_limit_freq; + u32 max_limit_freq; + + u32 max_freq; + u32 min_freq; + u32 nominal_freq; + u32 lowest_nonlinear_freq; + + struct amd_aperf_mperf cur; + struct amd_aperf_mperf prev; + + u64 freq; + bool boost_supported; + bool hw_prefcore; + + /* EPP feature related attributes*/ + s16 epp_policy; + s16 epp_cached; + u32 policy; + u64 cppc_cap1_cached; + bool suspended; +}; + +#endif /* _LINUX_AMD_PSTATE_H */ diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bd1e1357ce..b993a49808 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -156,6 +156,7 @@ static const struct of_device_id blocklist[] __initconst = { { .compatible = "qcom,sc7280", }, { .compatible = "qcom,sc8180x", }, { .compatible = "qcom,sc8280xp", }, + { .compatible = "qcom,sdm670", }, { .compatible = "qcom,sdm845", }, { .compatible = "qcom,sdx75", }, { .compatible = "qcom,sm6115", }, diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 86f1bc7754..fd9c3ed21f 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; if (latency) { + unsigned int max_delay_us = 2 * MSEC_PER_SEC; + + /* + * If the platform already has high transition_latency, use it + * as-is. + */ + if (latency > max_delay_us) + return latency; + /* - * For platforms that can change the frequency very fast (< 10 + * For platforms that can change the frequency very fast (< 2 * us), the above formula gives a decent transition delay. But * for platforms where transition_latency is in milliseconds, it * ends up giving unrealistic values. * - * Cap the default transition delay to 10 ms, which seems to be + * Cap the default transition delay to 2 ms, which seems to be * a reasonable amount of time after which we should reevaluate * the frequency. */ - return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000); + return min(latency * LATENCY_MULTIPLIER, max_delay_us); } return LATENCY_MULTIPLIER; diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index c52d19d675..a7c38b8b3e 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -22,7 +22,6 @@ #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (100000) #define MICRO_FREQUENCY_UP_THRESHOLD (95) -#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000) #define MIN_FREQUENCY_UP_THRESHOLD (1) #define MAX_FREQUENCY_UP_THRESHOLD (100) diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c index 33728c242f..c20d3ecc5a 100644 --- a/drivers/cpufreq/imx6q-cpufreq.c +++ b/drivers/cpufreq/imx6q-cpufreq.c @@ -14,6 +14,8 @@ #include #include #include +#include +#include #define PU_SOC_VOLTAGE_NORMAL 1250000 #define PU_SOC_VOLTAGE_HIGH 1275000 @@ -225,8 +227,6 @@ static void imx6x_disable_freq_in_opp(struct device *dev, unsigned long freq) static int imx6q_opp_check_speed_grading(struct device *dev) { - struct device_node *np; - void __iomem *base; u32 val; int ret; @@ -235,16 +235,11 @@ static int imx6q_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp"); - if (!np) - return -ENOENT; + struct regmap *ocotp; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6q-ocotp"); + if (IS_ERR(ocotp)) + return -ENOENT; /* * SPEED_GRADING[1:0] defines the max speed of ARM: @@ -254,8 +249,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev) * 2b'00: 792000000Hz; * We need to set the max speed of ARM according to fuse map. */ - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } val >>= OCOTP_CFG3_SPEED_SHIFT; @@ -290,25 +284,16 @@ static int imx6ul_opp_check_speed_grading(struct device *dev) if (ret) return ret; } else { - struct device_node *np; - void __iomem *base; - - np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp"); - if (!np) - np = of_find_compatible_node(NULL, NULL, - "fsl,imx6ull-ocotp"); - if (!np) - return -ENOENT; + struct regmap *ocotp; - base = of_iomap(np, 0); - of_node_put(np); - if (!base) { - dev_err(dev, "failed to map ocotp\n"); - return -EFAULT; - } + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ul-ocotp"); + if (IS_ERR(ocotp)) + ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ull-ocotp"); + + if (IS_ERR(ocotp)) + return -ENOENT; - val = readl_relaxed(base + OCOTP_CFG3); - iounmap(base); + regmap_read(ocotp, OCOTP_CFG3, &val); } /* diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 79619227ea..dbbf299f42 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -201,8 +202,6 @@ struct global_params { * @prev_aperf: Last APERF value read from APERF MSR * @prev_mperf: Last MPERF value read from MPERF MSR * @prev_tsc: Last timestamp counter (TSC) value - * @prev_cummulative_iowait: IO Wait time difference from last and - * current sample * @sample: Storage for storing last Sample data * @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios * @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios @@ -241,7 +240,6 @@ struct cpudata { u64 prev_aperf; u64 prev_mperf; u64 prev_tsc; - u64 prev_cummulative_iowait; struct sample sample; int32_t min_perf_ratio; int32_t max_perf_ratio; @@ -3407,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void) return !!(value & 0x1); } -static const struct x86_cpu_id intel_epp_balance_perf[] = { +#define POWERSAVE_MASK GENMASK(7, 0) +#define BALANCE_POWER_MASK GENMASK(15, 8) +#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16) +#define PERFORMANCE_MASK GENMASK(31, 24) + +#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \ + (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\ + FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\ + FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\ + FIELD_PREP_CONST(PERFORMANCE_MASK, performance)) + +#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \ + (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\ + balance_perf, HWP_EPP_PERFORMANCE)) + +static const struct x86_cpu_id intel_epp_default[] = { /* * Set EPP value as 102, this is the max suggested EPP * which can result in one core turbo frequency for * AlderLake Mobile CPUs. */ - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102), - X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32), + X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, + HWP_EPP_BALANCE_POWERSAVE, 115, 16)), {} }; @@ -3512,11 +3527,24 @@ hwp_cpu_matched: intel_pstate_sysfs_expose_params(); if (hwp_active) { - const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf); + const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default); const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor); - if (id) - epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data; + if (id) { + epp_values[EPP_INDEX_POWERSAVE] = + FIELD_GET(POWERSAVE_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_POWERSAVE] = + FIELD_GET(BALANCE_POWER_MASK, id->driver_data); + epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = + FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data); + epp_values[EPP_INDEX_PERFORMANCE] = + FIELD_GET(PERFORMANCE_MASK, id->driver_data); + pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n", + epp_values[EPP_INDEX_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_POWERSAVE], + epp_values[EPP_INDEX_BALANCE_PERFORMANCE], + epp_values[EPP_INDEX_PERFORMANCE]); + } if (hybrid_id) { hybrid_scaling_factor = hybrid_id->driver_data; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 4ee23f4ebf..3b4f6bfb2f 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -30,6 +30,7 @@ struct scmi_data { static struct scmi_protocol_handle *ph; static const struct scmi_perf_proto_ops *perf_ops; +static struct cpufreq_driver scmi_cpufreq_driver; static unsigned int scmi_cpufreq_get_rate(unsigned int cpu) { @@ -144,6 +145,35 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, return 0; } +static int +scmi_get_rate_limit(u32 domain, bool has_fast_switch) +{ + int ret, rate_limit; + + if (has_fast_switch) { + /* + * Fast channels are used whenever available, + * so use their rate_limit value if populated. + */ + ret = perf_ops->fast_switch_rate_limit(ph, domain, + &rate_limit); + if (!ret && rate_limit) + return rate_limit; + } + + ret = perf_ops->rate_limit_get(ph, domain, &rate_limit); + if (ret) + return 0; + + return rate_limit; +} + +static struct freq_attr *scmi_cpufreq_hw_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, + NULL, +}; + static int scmi_cpufreq_init(struct cpufreq_policy *policy) { int ret, nr_opp, domain; @@ -250,6 +280,20 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy) policy->fast_switch_possible = perf_ops->fast_switch_possible(ph, domain); + policy->transition_delay_us = + scmi_get_rate_limit(domain, policy->fast_switch_possible); + + if (policy_has_boost_freq(policy)) { + ret = cpufreq_enable_boost_support(); + if (ret) { + dev_warn(cpu_dev, "failed to enable boost: %d\n", ret); + goto out_free_opp; + } else { + scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs; + scmi_cpufreq_driver.boost_enabled = true; + } + } + return 0; out_free_opp: @@ -308,7 +352,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = { CPUFREQ_NEED_INITIAL_FREQ_CHECK | CPUFREQ_IS_COOLING_DEV, .verify = cpufreq_generic_frequency_table_verify, - .attr = cpufreq_generic_attr, + .attr = scmi_cpufreq_hw_attr, .target_index = scmi_cpufreq_set_target, .fast_switch = scmi_cpufreq_fast_switch, .get = scmi_cpufreq_get_rate, -- cgit v1.2.3