summaryrefslogtreecommitdiffstats
path: root/drivers/cpufreq
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r--drivers/cpufreq/Kconfig29
-rw-r--r--drivers/cpufreq/Kconfig.arm26
-rw-r--r--drivers/cpufreq/amd-pstate-ut.c3
-rw-r--r--drivers/cpufreq/amd-pstate.c432
-rw-r--r--drivers/cpufreq/amd-pstate.h100
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c1
-rw-r--r--drivers/cpufreq/cpufreq.c15
-rw-r--r--drivers/cpufreq/cpufreq_ondemand.c1
-rw-r--r--drivers/cpufreq/imx6q-cpufreq.c45
-rw-r--r--drivers/cpufreq/intel_pstate.c46
-rw-r--r--drivers/cpufreq/scmi-cpufreq.c46
11 files changed, 595 insertions, 149 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 35efb53d54..94e55c4097 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -302,4 +302,33 @@ config QORIQ_CPUFREQ
which are capable of changing the CPU's frequency dynamically.
endif
+
+config ACPI_CPPC_CPUFREQ
+ tristate "CPUFreq driver based on the ACPI CPPC spec"
+ depends on ACPI_PROCESSOR
+ depends on ARM || ARM64 || RISCV
+ select ACPI_CPPC_LIB
+ help
+ This adds a CPUFreq driver which uses CPPC methods
+ as described in the ACPIv5.1 spec. CPPC stands for
+ Collaborative Processor Performance Controls. It
+ is based on an abstract continuous scale of CPU
+ performance values which allows the remote power
+ processor to flexibly optimize for power and
+ performance. CPPC relies on power management firmware
+ support for its operation.
+
+ If in doubt, say N.
+
+config ACPI_CPPC_CPUFREQ_FIE
+ bool "Frequency Invariance support for CPPC cpufreq driver"
+ depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
+ depends on ARM || ARM64 || RISCV
+ default y
+ help
+ This extends frequency invariance support in the CPPC cpufreq driver,
+ by using CPPC delivered and reference performance counters.
+
+ If in doubt, say N.
+
endmenu
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index a0ebad7766..96b404ce82 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -3,32 +3,6 @@
# ARM CPU Frequency scaling drivers
#
-config ACPI_CPPC_CPUFREQ
- tristate "CPUFreq driver based on the ACPI CPPC spec"
- depends on ACPI_PROCESSOR
- select ACPI_CPPC_LIB
- help
- This adds a CPUFreq driver which uses CPPC methods
- as described in the ACPIv5.1 spec. CPPC stands for
- Collaborative Processor Performance Controls. It
- is based on an abstract continuous scale of CPU
- performance values which allows the remote power
- processor to flexibly optimize for power and
- performance. CPPC relies on power management firmware
- support for its operation.
-
- If in doubt, say N.
-
-config ACPI_CPPC_CPUFREQ_FIE
- bool "Frequency Invariance support for CPPC cpufreq driver"
- depends on ACPI_CPPC_CPUFREQ && GENERIC_ARCH_TOPOLOGY
- default y
- help
- This extends frequency invariance support in the CPPC cpufreq driver,
- by using CPPC delivered and reference performance counters.
-
- If in doubt, say N.
-
config ARM_ALLWINNER_SUN50I_CPUFREQ_NVMEM
tristate "Allwinner nvmem based SUN50I CPUFreq driver"
depends on ARCH_SUNXI
diff --git a/drivers/cpufreq/amd-pstate-ut.c b/drivers/cpufreq/amd-pstate-ut.c
index f04ae67dda..fc275d41d5 100644
--- a/drivers/cpufreq/amd-pstate-ut.c
+++ b/drivers/cpufreq/amd-pstate-ut.c
@@ -26,10 +26,11 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/fs.h>
-#include <linux/amd-pstate.h>
#include <acpi/cppc_acpi.h>
+#include "amd-pstate.h"
+
/*
* Abbreviations:
* amd_pstate_ut: used as a shortform for AMD P-State unit test.
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 07f3419954..6af175e6c0 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -36,7 +36,7 @@
#include <linux/delay.h>
#include <linux/uaccess.h>
#include <linux/static_call.h>
-#include <linux/amd-pstate.h>
+#include <linux/topology.h>
#include <acpi/processor.h>
#include <acpi/cppc_acpi.h>
@@ -45,10 +45,45 @@
#include <asm/processor.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
+
+#include "amd-pstate.h"
#include "amd-pstate-trace.h"
#define AMD_PSTATE_TRANSITION_LATENCY 20000
#define AMD_PSTATE_TRANSITION_DELAY 1000
+#define CPPC_HIGHEST_PERF_PERFORMANCE 196
+#define CPPC_HIGHEST_PERF_DEFAULT 166
+
+#define AMD_CPPC_EPP_PERFORMANCE 0x00
+#define AMD_CPPC_EPP_BALANCE_PERFORMANCE 0x80
+#define AMD_CPPC_EPP_BALANCE_POWERSAVE 0xBF
+#define AMD_CPPC_EPP_POWERSAVE 0xFF
+
+/*
+ * enum amd_pstate_mode - driver working mode of amd pstate
+ */
+enum amd_pstate_mode {
+ AMD_PSTATE_UNDEFINED = 0,
+ AMD_PSTATE_DISABLE,
+ AMD_PSTATE_PASSIVE,
+ AMD_PSTATE_ACTIVE,
+ AMD_PSTATE_GUIDED,
+ AMD_PSTATE_MAX,
+};
+
+static const char * const amd_pstate_mode_string[] = {
+ [AMD_PSTATE_UNDEFINED] = "undefined",
+ [AMD_PSTATE_DISABLE] = "disable",
+ [AMD_PSTATE_PASSIVE] = "passive",
+ [AMD_PSTATE_ACTIVE] = "active",
+ [AMD_PSTATE_GUIDED] = "guided",
+ NULL,
+};
+
+struct quirk_entry {
+ u32 nominal_freq;
+ u32 lowest_freq;
+};
/*
* TODO: We need more time to fine tune processors with shared memory solution
@@ -64,6 +99,8 @@ static struct cpufreq_driver amd_pstate_driver;
static struct cpufreq_driver amd_pstate_epp_driver;
static int cppc_state = AMD_PSTATE_UNDEFINED;
static bool cppc_enabled;
+static bool amd_pstate_prefcore = true;
+static struct quirk_entry *quirks;
/*
* AMD Energy Preference Performance (EPP)
@@ -108,6 +145,41 @@ static unsigned int epp_values[] = {
typedef int (*cppc_mode_transition_fn)(int);
+static struct quirk_entry quirk_amd_7k62 = {
+ .nominal_freq = 2600,
+ .lowest_freq = 550,
+};
+
+static int __init dmi_matched_7k62_bios_bug(const struct dmi_system_id *dmi)
+{
+ /**
+ * match the broken bios for family 17h processor support CPPC V2
+ * broken BIOS lack of nominal_freq and lowest_freq capabilities
+ * definition in ACPI tables
+ */
+ if (boot_cpu_has(X86_FEATURE_ZEN2)) {
+ quirks = dmi->driver_data;
+ pr_info("Overriding nominal and lowest frequencies for %s\n", dmi->ident);
+ return 1;
+ }
+
+ return 0;
+}
+
+static const struct dmi_system_id amd_pstate_quirks_table[] __initconst = {
+ {
+ .callback = dmi_matched_7k62_bios_bug,
+ .ident = "AMD EPYC 7K62",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VERSION, "5.14"),
+ DMI_MATCH(DMI_BIOS_RELEASE, "12/12/2019"),
+ },
+ .driver_data = &quirk_amd_7k62,
+ },
+ {}
+};
+MODULE_DEVICE_TABLE(dmi, amd_pstate_quirks_table);
+
static inline int get_mode_idx_from_str(const char *str, size_t size)
{
int i;
@@ -287,6 +359,21 @@ static inline int amd_pstate_enable(bool enable)
return static_call(amd_pstate_enable)(enable);
}
+static u32 amd_pstate_highest_perf_set(struct amd_cpudata *cpudata)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ /*
+ * For AMD CPUs with Family ID 19H and Model ID range 0x70 to 0x7f,
+ * the highest performance level is set to 196.
+ * https://bugzilla.kernel.org/show_bug.cgi?id=218759
+ */
+ if (c->x86 == 0x19 && (c->x86_model >= 0x70 && c->x86_model <= 0x7f))
+ return CPPC_HIGHEST_PERF_PERFORMANCE;
+
+ return CPPC_HIGHEST_PERF_DEFAULT;
+}
+
static int pstate_init_perf(struct amd_cpudata *cpudata)
{
u64 cap1;
@@ -297,13 +384,14 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
if (ret)
return ret;
- /*
- * TODO: Introduce AMD specific power feature.
- *
- * CPPC entry doesn't indicate the highest performance in some ASICs.
+ /* For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the AMD_CPPC_HIGHEST_PERF(cap1) value as
+ * the default max perf.
*/
- highest_perf = amd_get_highest_perf();
- if (highest_perf > AMD_CPPC_HIGHEST_PERF(cap1))
+ if (cpudata->hw_prefcore)
+ highest_perf = amd_pstate_highest_perf_set(cpudata);
+ else
highest_perf = AMD_CPPC_HIGHEST_PERF(cap1);
WRITE_ONCE(cpudata->highest_perf, highest_perf);
@@ -311,6 +399,7 @@ static int pstate_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->nominal_perf, AMD_CPPC_NOMINAL_PERF(cap1));
WRITE_ONCE(cpudata->lowest_nonlinear_perf, AMD_CPPC_LOWNONLIN_PERF(cap1));
WRITE_ONCE(cpudata->lowest_perf, AMD_CPPC_LOWEST_PERF(cap1));
+ WRITE_ONCE(cpudata->prefcore_ranking, AMD_CPPC_HIGHEST_PERF(cap1));
WRITE_ONCE(cpudata->min_limit_perf, AMD_CPPC_LOWEST_PERF(cap1));
return 0;
}
@@ -324,8 +413,9 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
if (ret)
return ret;
- highest_perf = amd_get_highest_perf();
- if (highest_perf > cppc_perf.highest_perf)
+ if (cpudata->hw_prefcore)
+ highest_perf = amd_pstate_highest_perf_set(cpudata);
+ else
highest_perf = cppc_perf.highest_perf;
WRITE_ONCE(cpudata->highest_perf, highest_perf);
@@ -334,6 +424,7 @@ static int cppc_init_perf(struct amd_cpudata *cpudata)
WRITE_ONCE(cpudata->lowest_nonlinear_perf,
cppc_perf.lowest_nonlinear_perf);
WRITE_ONCE(cpudata->lowest_perf, cppc_perf.lowest_perf);
+ WRITE_ONCE(cpudata->prefcore_ranking, cppc_perf.highest_perf);
WRITE_ONCE(cpudata->min_limit_perf, cppc_perf.lowest_perf);
if (cppc_state == AMD_PSTATE_ACTIVE)
@@ -477,12 +568,19 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy)
static int amd_pstate_update_min_max_limit(struct cpufreq_policy *policy)
{
- u32 max_limit_perf, min_limit_perf;
+ u32 max_limit_perf, min_limit_perf, lowest_perf;
struct amd_cpudata *cpudata = policy->driver_data;
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ lowest_perf = READ_ONCE(cpudata->lowest_perf);
+ if (min_limit_perf < lowest_perf)
+ min_limit_perf = lowest_perf;
+
+ if (max_limit_perf < min_limit_perf)
+ max_limit_perf = min_limit_perf;
+
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
WRITE_ONCE(cpudata->max_limit_freq, policy->max);
@@ -592,74 +690,22 @@ static void amd_pstate_adjust_perf(unsigned int cpu,
static int amd_get_min_freq(struct amd_cpudata *cpudata)
{
- struct cppc_perf_caps cppc_perf;
-
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- if (ret)
- return ret;
-
- /* Switch to khz */
- return cppc_perf.lowest_freq * 1000;
+ return READ_ONCE(cpudata->min_freq);
}
static int amd_get_max_freq(struct amd_cpudata *cpudata)
{
- struct cppc_perf_caps cppc_perf;
- u32 max_perf, max_freq, nominal_freq, nominal_perf;
- u64 boost_ratio;
-
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- if (ret)
- return ret;
-
- nominal_freq = cppc_perf.nominal_freq;
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
- max_perf = READ_ONCE(cpudata->highest_perf);
-
- boost_ratio = div_u64(max_perf << SCHED_CAPACITY_SHIFT,
- nominal_perf);
-
- max_freq = nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT;
-
- /* Switch to khz */
- return max_freq * 1000;
+ return READ_ONCE(cpudata->max_freq);
}
static int amd_get_nominal_freq(struct amd_cpudata *cpudata)
{
- struct cppc_perf_caps cppc_perf;
-
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- if (ret)
- return ret;
-
- /* Switch to khz */
- return cppc_perf.nominal_freq * 1000;
+ return READ_ONCE(cpudata->nominal_freq);
}
static int amd_get_lowest_nonlinear_freq(struct amd_cpudata *cpudata)
{
- struct cppc_perf_caps cppc_perf;
- u32 lowest_nonlinear_freq, lowest_nonlinear_perf,
- nominal_freq, nominal_perf;
- u64 lowest_nonlinear_ratio;
-
- int ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
- if (ret)
- return ret;
-
- nominal_freq = cppc_perf.nominal_freq;
- nominal_perf = READ_ONCE(cpudata->nominal_perf);
-
- lowest_nonlinear_perf = cppc_perf.lowest_nonlinear_perf;
-
- lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
- nominal_perf);
-
- lowest_nonlinear_freq = nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT;
-
- /* Switch to khz */
- return lowest_nonlinear_freq * 1000;
+ return READ_ONCE(cpudata->lowest_nonlinear_freq);
}
static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
@@ -675,7 +721,7 @@ static int amd_pstate_set_boost(struct cpufreq_policy *policy, int state)
if (state)
policy->cpuinfo.max_freq = cpudata->max_freq;
else
- policy->cpuinfo.max_freq = cpudata->nominal_freq;
+ policy->cpuinfo.max_freq = cpudata->nominal_freq * 1000;
policy->max = policy->cpuinfo.max_freq;
@@ -706,6 +752,169 @@ static void amd_perf_ctl_reset(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
+/*
+ * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
+ * due to locking, so queue the work for later.
+ */
+static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
+{
+ sched_set_itmt_support();
+}
+static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
+
+/*
+ * Get the highest performance register value.
+ * @cpu: CPU from which to get highest performance.
+ * @highest_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+static int amd_pstate_get_highest_perf(int cpu, u32 *highest_perf)
+{
+ int ret;
+
+ if (boot_cpu_has(X86_FEATURE_CPPC)) {
+ u64 cap1;
+
+ ret = rdmsrl_safe_on_cpu(cpu, MSR_AMD_CPPC_CAP1, &cap1);
+ if (ret)
+ return ret;
+ WRITE_ONCE(*highest_perf, AMD_CPPC_HIGHEST_PERF(cap1));
+ } else {
+ u64 cppc_highest_perf;
+
+ ret = cppc_get_highest_perf(cpu, &cppc_highest_perf);
+ if (ret)
+ return ret;
+ WRITE_ONCE(*highest_perf, cppc_highest_perf);
+ }
+
+ return (ret);
+}
+
+#define CPPC_MAX_PERF U8_MAX
+
+static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+{
+ int ret, prio;
+ u32 highest_perf;
+
+ ret = amd_pstate_get_highest_perf(cpudata->cpu, &highest_perf);
+ if (ret)
+ return;
+
+ cpudata->hw_prefcore = true;
+ /* check if CPPC preferred core feature is enabled*/
+ if (highest_perf < CPPC_MAX_PERF)
+ prio = (int)highest_perf;
+ else {
+ pr_debug("AMD CPPC preferred core is unsupported!\n");
+ cpudata->hw_prefcore = false;
+ return;
+ }
+
+ if (!amd_pstate_prefcore)
+ return;
+
+ /*
+ * The priorities can be set regardless of whether or not
+ * sched_set_itmt_support(true) has been called and it is valid to
+ * update them at any time after it has been called.
+ */
+ sched_set_itmt_core_prio(prio, cpudata->cpu);
+
+ schedule_work(&sched_prefcore_work);
+}
+
+static void amd_pstate_update_limits(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+ struct amd_cpudata *cpudata = policy->driver_data;
+ u32 prev_high = 0, cur_high = 0;
+ int ret;
+ bool highest_perf_changed = false;
+
+ mutex_lock(&amd_pstate_driver_lock);
+ if ((!amd_pstate_prefcore) || (!cpudata->hw_prefcore))
+ goto free_cpufreq_put;
+
+ ret = amd_pstate_get_highest_perf(cpu, &cur_high);
+ if (ret)
+ goto free_cpufreq_put;
+
+ prev_high = READ_ONCE(cpudata->prefcore_ranking);
+ if (prev_high != cur_high) {
+ highest_perf_changed = true;
+ WRITE_ONCE(cpudata->prefcore_ranking, cur_high);
+
+ if (cur_high < CPPC_MAX_PERF)
+ sched_set_itmt_core_prio((int)cur_high, cpu);
+ }
+
+free_cpufreq_put:
+ cpufreq_cpu_put(policy);
+
+ if (!highest_perf_changed)
+ cpufreq_update_policy(cpu);
+
+ mutex_unlock(&amd_pstate_driver_lock);
+}
+
+/**
+ * amd_pstate_init_freq: Initialize the max_freq, min_freq,
+ * nominal_freq and lowest_nonlinear_freq for
+ * the @cpudata object.
+ *
+ * Requires: highest_perf, lowest_perf, nominal_perf and
+ * lowest_nonlinear_perf members of @cpudata to be
+ * initialized.
+ *
+ * Returns 0 on success, non-zero value on failure.
+ */
+static int amd_pstate_init_freq(struct amd_cpudata *cpudata)
+{
+ int ret;
+ u32 min_freq;
+ u32 highest_perf, max_freq;
+ u32 nominal_perf, nominal_freq;
+ u32 lowest_nonlinear_perf, lowest_nonlinear_freq;
+ u32 boost_ratio, lowest_nonlinear_ratio;
+ struct cppc_perf_caps cppc_perf;
+
+
+ ret = cppc_get_perf_caps(cpudata->cpu, &cppc_perf);
+ if (ret)
+ return ret;
+
+ if (quirks && quirks->lowest_freq)
+ min_freq = quirks->lowest_freq * 1000;
+ else
+ min_freq = cppc_perf.lowest_freq * 1000;
+
+ if (quirks && quirks->nominal_freq)
+ nominal_freq = quirks->nominal_freq ;
+ else
+ nominal_freq = cppc_perf.nominal_freq;
+
+ nominal_perf = READ_ONCE(cpudata->nominal_perf);
+
+ highest_perf = READ_ONCE(cpudata->highest_perf);
+ boost_ratio = div_u64(highest_perf << SCHED_CAPACITY_SHIFT, nominal_perf);
+ max_freq = (nominal_freq * boost_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+
+ lowest_nonlinear_perf = READ_ONCE(cpudata->lowest_nonlinear_perf);
+ lowest_nonlinear_ratio = div_u64(lowest_nonlinear_perf << SCHED_CAPACITY_SHIFT,
+ nominal_perf);
+ lowest_nonlinear_freq = (nominal_freq * lowest_nonlinear_ratio >> SCHED_CAPACITY_SHIFT) * 1000;
+
+ WRITE_ONCE(cpudata->min_freq, min_freq);
+ WRITE_ONCE(cpudata->lowest_nonlinear_freq, lowest_nonlinear_freq);
+ WRITE_ONCE(cpudata->nominal_freq, nominal_freq);
+ WRITE_ONCE(cpudata->max_freq, max_freq);
+
+ return 0;
+}
+
static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
{
int min_freq, max_freq, nominal_freq, lowest_nonlinear_freq, ret;
@@ -727,10 +936,16 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
+ amd_pstate_init_prefcore(cpudata);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
+ ret = amd_pstate_init_freq(cpudata);
+ if (ret)
+ goto free_cpudata1;
+
min_freq = amd_get_min_freq(cpudata);
max_freq = amd_get_max_freq(cpudata);
nominal_freq = amd_get_nominal_freq(cpudata);
@@ -772,13 +987,8 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
goto free_cpudata2;
}
- /* Initial processor data capability frequencies */
- cpudata->max_freq = max_freq;
- cpudata->min_freq = min_freq;
cpudata->max_limit_freq = max_freq;
cpudata->min_limit_freq = min_freq;
- cpudata->nominal_freq = nominal_freq;
- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
policy->driver_data = cpudata;
@@ -877,6 +1087,28 @@ static ssize_t show_amd_pstate_highest_perf(struct cpufreq_policy *policy,
return sysfs_emit(buf, "%u\n", perf);
}
+static ssize_t show_amd_pstate_prefcore_ranking(struct cpufreq_policy *policy,
+ char *buf)
+{
+ u32 perf;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ perf = READ_ONCE(cpudata->prefcore_ranking);
+
+ return sysfs_emit(buf, "%u\n", perf);
+}
+
+static ssize_t show_amd_pstate_hw_prefcore(struct cpufreq_policy *policy,
+ char *buf)
+{
+ bool hw_prefcore;
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ hw_prefcore = READ_ONCE(cpudata->hw_prefcore);
+
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(hw_prefcore));
+}
+
static ssize_t show_energy_performance_available_preferences(
struct cpufreq_policy *policy, char *buf)
{
@@ -1074,18 +1306,29 @@ static ssize_t status_store(struct device *a, struct device_attribute *b,
return ret < 0 ? ret : count;
}
+static ssize_t prefcore_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", str_enabled_disabled(amd_pstate_prefcore));
+}
+
cpufreq_freq_attr_ro(amd_pstate_max_freq);
cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
+cpufreq_freq_attr_ro(amd_pstate_prefcore_ranking);
+cpufreq_freq_attr_ro(amd_pstate_hw_prefcore);
cpufreq_freq_attr_rw(energy_performance_preference);
cpufreq_freq_attr_ro(energy_performance_available_preferences);
static DEVICE_ATTR_RW(status);
+static DEVICE_ATTR_RO(prefcore);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
&amd_pstate_lowest_nonlinear_freq,
&amd_pstate_highest_perf,
+ &amd_pstate_prefcore_ranking,
+ &amd_pstate_hw_prefcore,
NULL,
};
@@ -1093,6 +1336,8 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
&amd_pstate_max_freq,
&amd_pstate_lowest_nonlinear_freq,
&amd_pstate_highest_perf,
+ &amd_pstate_prefcore_ranking,
+ &amd_pstate_hw_prefcore,
&energy_performance_preference,
&energy_performance_available_preferences,
NULL,
@@ -1100,6 +1345,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
static struct attribute *pstate_global_attributes[] = {
&dev_attr_status.attr,
+ &dev_attr_prefcore.attr,
NULL
};
@@ -1151,10 +1397,16 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
cpudata->cpu = policy->cpu;
cpudata->epp_policy = 0;
+ amd_pstate_init_prefcore(cpudata);
+
ret = amd_pstate_init_perf(cpudata);
if (ret)
goto free_cpudata1;
+ ret = amd_pstate_init_freq(cpudata);
+ if (ret)
+ goto free_cpudata1;
+
min_freq = amd_get_min_freq(cpudata);
max_freq = amd_get_max_freq(cpudata);
nominal_freq = amd_get_nominal_freq(cpudata);
@@ -1171,12 +1423,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
/* It will be updated by governor */
policy->cur = policy->cpuinfo.min_freq;
- /* Initial processor data capability frequencies */
- cpudata->max_freq = max_freq;
- cpudata->min_freq = min_freq;
- cpudata->nominal_freq = nominal_freq;
- cpudata->lowest_nonlinear_freq = lowest_nonlinear_freq;
-
policy->driver_data = cpudata;
cpudata->epp_cached = amd_pstate_get_epp(cpudata, 0);
@@ -1216,6 +1462,13 @@ free_cpudata1:
static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
{
+ struct amd_cpudata *cpudata = policy->driver_data;
+
+ if (cpudata) {
+ kfree(cpudata);
+ policy->driver_data = NULL;
+ }
+
pr_debug("CPU %d exiting\n", policy->cpu);
return 0;
}
@@ -1232,6 +1485,12 @@ static void amd_pstate_epp_update_limit(struct cpufreq_policy *policy)
max_limit_perf = div_u64(policy->max * cpudata->highest_perf, cpudata->max_freq);
min_limit_perf = div_u64(policy->min * cpudata->highest_perf, cpudata->max_freq);
+ if (min_limit_perf < min_perf)
+ min_limit_perf = min_perf;
+
+ if (max_limit_perf < min_limit_perf)
+ max_limit_perf = min_limit_perf;
+
WRITE_ONCE(cpudata->max_limit_perf, max_limit_perf);
WRITE_ONCE(cpudata->min_limit_perf, min_limit_perf);
@@ -1432,6 +1691,7 @@ static struct cpufreq_driver amd_pstate_driver = {
.suspend = amd_pstate_cpu_suspend,
.resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
+ .update_limits = amd_pstate_update_limits,
.name = "amd-pstate",
.attr = amd_pstate_attr,
};
@@ -1446,6 +1706,7 @@ static struct cpufreq_driver amd_pstate_epp_driver = {
.online = amd_pstate_epp_cpu_online,
.suspend = amd_pstate_epp_suspend,
.resume = amd_pstate_epp_resume,
+ .update_limits = amd_pstate_update_limits,
.name = "amd-pstate-epp",
.attr = amd_pstate_epp_attr,
};
@@ -1486,6 +1747,11 @@ static int __init amd_pstate_init(void)
if (cpufreq_get_current_driver())
return -EEXIST;
+ quirks = NULL;
+
+ /* check if this machine need CPPC quirks */
+ dmi_check_system(amd_pstate_quirks_table);
+
switch (cppc_state) {
case AMD_PSTATE_UNDEFINED:
/* Disable on the following configs by default:
@@ -1567,7 +1833,17 @@ static int __init amd_pstate_param(char *str)
return amd_pstate_set_driver(mode_idx);
}
+
+static int __init amd_prefcore_param(char *str)
+{
+ if (!strcmp(str, "disable"))
+ amd_pstate_prefcore = false;
+
+ return 0;
+}
+
early_param("amd_pstate", amd_pstate_param);
+early_param("amd_prefcore", amd_prefcore_param);
MODULE_AUTHOR("Huang Rui <ray.huang@amd.com>");
MODULE_DESCRIPTION("AMD Processor P-state Frequency Driver");
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
new file mode 100644
index 0000000000..bc341f3590
--- /dev/null
+++ b/drivers/cpufreq/amd-pstate.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 Advanced Micro Devices, Inc.
+ *
+ * Author: Meng Li <li.meng@amd.com>
+ */
+
+#ifndef _LINUX_AMD_PSTATE_H
+#define _LINUX_AMD_PSTATE_H
+
+#include <linux/pm_qos.h>
+
+/*********************************************************************
+ * AMD P-state INTERFACE *
+ *********************************************************************/
+/**
+ * struct amd_aperf_mperf
+ * @aperf: actual performance frequency clock count
+ * @mperf: maximum performance frequency clock count
+ * @tsc: time stamp counter
+ */
+struct amd_aperf_mperf {
+ u64 aperf;
+ u64 mperf;
+ u64 tsc;
+};
+
+/**
+ * struct amd_cpudata - private CPU data for AMD P-State
+ * @cpu: CPU number
+ * @req: constraint request to apply
+ * @cppc_req_cached: cached performance request hints
+ * @highest_perf: the maximum performance an individual processor may reach,
+ * assuming ideal conditions
+ * For platforms that do not support the preferred core feature, the
+ * highest_pef may be configured with 166 or 255, to avoid max frequency
+ * calculated wrongly. we take the fixed value as the highest_perf.
+ * @nominal_perf: the maximum sustained performance level of the processor,
+ * assuming ideal operating conditions
+ * @lowest_nonlinear_perf: the lowest performance level at which nonlinear power
+ * savings are achieved
+ * @lowest_perf: the absolute lowest performance level of the processor
+ * @prefcore_ranking: the preferred core ranking, the higher value indicates a higher
+ * priority.
+ * @max_freq: the frequency that mapped to highest_perf
+ * @min_freq: the frequency that mapped to lowest_perf
+ * @nominal_freq: the frequency that mapped to nominal_perf
+ * @lowest_nonlinear_freq: the frequency that mapped to lowest_nonlinear_perf
+ * @cur: Difference of Aperf/Mperf/tsc count between last and current sample
+ * @prev: Last Aperf/Mperf/tsc count value read from register
+ * @freq: current cpu frequency value
+ * @boost_supported: check whether the Processor or SBIOS supports boost mode
+ * @hw_prefcore: check whether HW supports preferred core featue.
+ * Only when hw_prefcore and early prefcore param are true,
+ * AMD P-State driver supports preferred core featue.
+ * @epp_policy: Last saved policy used to set energy-performance preference
+ * @epp_cached: Cached CPPC energy-performance preference value
+ * @policy: Cpufreq policy value
+ * @cppc_cap1_cached Cached MSR_AMD_CPPC_CAP1 register value
+ *
+ * The amd_cpudata is key private data for each CPU thread in AMD P-State, and
+ * represents all the attributes and goals that AMD P-State requests at runtime.
+ */
+struct amd_cpudata {
+ int cpu;
+
+ struct freq_qos_request req[2];
+ u64 cppc_req_cached;
+
+ u32 highest_perf;
+ u32 nominal_perf;
+ u32 lowest_nonlinear_perf;
+ u32 lowest_perf;
+ u32 prefcore_ranking;
+ u32 min_limit_perf;
+ u32 max_limit_perf;
+ u32 min_limit_freq;
+ u32 max_limit_freq;
+
+ u32 max_freq;
+ u32 min_freq;
+ u32 nominal_freq;
+ u32 lowest_nonlinear_freq;
+
+ struct amd_aperf_mperf cur;
+ struct amd_aperf_mperf prev;
+
+ u64 freq;
+ bool boost_supported;
+ bool hw_prefcore;
+
+ /* EPP feature related attributes*/
+ s16 epp_policy;
+ s16 epp_cached;
+ u32 policy;
+ u64 cppc_cap1_cached;
+ bool suspended;
+};
+
+#endif /* _LINUX_AMD_PSTATE_H */
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index bd1e1357ce..b993a49808 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -156,6 +156,7 @@ static const struct of_device_id blocklist[] __initconst = {
{ .compatible = "qcom,sc7280", },
{ .compatible = "qcom,sc8180x", },
{ .compatible = "qcom,sc8280xp", },
+ { .compatible = "qcom,sdm670", },
{ .compatible = "qcom,sdm845", },
{ .compatible = "qcom,sdx75", },
{ .compatible = "qcom,sm6115", },
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 86f1bc7754..fd9c3ed21f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -576,17 +576,26 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
if (latency) {
+ unsigned int max_delay_us = 2 * MSEC_PER_SEC;
+
+ /*
+ * If the platform already has high transition_latency, use it
+ * as-is.
+ */
+ if (latency > max_delay_us)
+ return latency;
+
/*
- * For platforms that can change the frequency very fast (< 10
+ * For platforms that can change the frequency very fast (< 2
* us), the above formula gives a decent transition delay. But
* for platforms where transition_latency is in milliseconds, it
* ends up giving unrealistic values.
*
- * Cap the default transition delay to 10 ms, which seems to be
+ * Cap the default transition delay to 2 ms, which seems to be
* a reasonable amount of time after which we should reevaluate
* the frequency.
*/
- return min(latency * LATENCY_MULTIPLIER, (unsigned int)10000);
+ return min(latency * LATENCY_MULTIPLIER, max_delay_us);
}
return LATENCY_MULTIPLIER;
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index c52d19d675..a7c38b8b3e 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -22,7 +22,6 @@
#define DEF_SAMPLING_DOWN_FACTOR (1)
#define MAX_SAMPLING_DOWN_FACTOR (100000)
#define MICRO_FREQUENCY_UP_THRESHOLD (95)
-#define MICRO_FREQUENCY_MIN_SAMPLE_RATE (10000)
#define MIN_FREQUENCY_UP_THRESHOLD (1)
#define MAX_FREQUENCY_UP_THRESHOLD (100)
diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 33728c242f..c20d3ecc5a 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -14,6 +14,8 @@
#include <linux/pm_opp.h>
#include <linux/platform_device.h>
#include <linux/regulator/consumer.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
#define PU_SOC_VOLTAGE_NORMAL 1250000
#define PU_SOC_VOLTAGE_HIGH 1275000
@@ -225,8 +227,6 @@ static void imx6x_disable_freq_in_opp(struct device *dev, unsigned long freq)
static int imx6q_opp_check_speed_grading(struct device *dev)
{
- struct device_node *np;
- void __iomem *base;
u32 val;
int ret;
@@ -235,16 +235,11 @@ static int imx6q_opp_check_speed_grading(struct device *dev)
if (ret)
return ret;
} else {
- np = of_find_compatible_node(NULL, NULL, "fsl,imx6q-ocotp");
- if (!np)
- return -ENOENT;
+ struct regmap *ocotp;
- base = of_iomap(np, 0);
- of_node_put(np);
- if (!base) {
- dev_err(dev, "failed to map ocotp\n");
- return -EFAULT;
- }
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6q-ocotp");
+ if (IS_ERR(ocotp))
+ return -ENOENT;
/*
* SPEED_GRADING[1:0] defines the max speed of ARM:
@@ -254,8 +249,7 @@ static int imx6q_opp_check_speed_grading(struct device *dev)
* 2b'00: 792000000Hz;
* We need to set the max speed of ARM according to fuse map.
*/
- val = readl_relaxed(base + OCOTP_CFG3);
- iounmap(base);
+ regmap_read(ocotp, OCOTP_CFG3, &val);
}
val >>= OCOTP_CFG3_SPEED_SHIFT;
@@ -290,25 +284,16 @@ static int imx6ul_opp_check_speed_grading(struct device *dev)
if (ret)
return ret;
} else {
- struct device_node *np;
- void __iomem *base;
-
- np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp");
- if (!np)
- np = of_find_compatible_node(NULL, NULL,
- "fsl,imx6ull-ocotp");
- if (!np)
- return -ENOENT;
+ struct regmap *ocotp;
- base = of_iomap(np, 0);
- of_node_put(np);
- if (!base) {
- dev_err(dev, "failed to map ocotp\n");
- return -EFAULT;
- }
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ul-ocotp");
+ if (IS_ERR(ocotp))
+ ocotp = syscon_regmap_lookup_by_compatible("fsl,imx6ull-ocotp");
+
+ if (IS_ERR(ocotp))
+ return -ENOENT;
- val = readl_relaxed(base + OCOTP_CFG3);
- iounmap(base);
+ regmap_read(ocotp, OCOTP_CFG3, &val);
}
/*
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 79619227ea..dbbf299f42 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -25,6 +25,7 @@
#include <linux/acpi.h>
#include <linux/vmalloc.h>
#include <linux/pm_qos.h>
+#include <linux/bitfield.h>
#include <trace/events/power.h>
#include <asm/cpu.h>
@@ -201,8 +202,6 @@ struct global_params {
* @prev_aperf: Last APERF value read from APERF MSR
* @prev_mperf: Last MPERF value read from MPERF MSR
* @prev_tsc: Last timestamp counter (TSC) value
- * @prev_cummulative_iowait: IO Wait time difference from last and
- * current sample
* @sample: Storage for storing last Sample data
* @min_perf_ratio: Minimum capacity in terms of PERF or HWP ratios
* @max_perf_ratio: Maximum capacity in terms of PERF or HWP ratios
@@ -241,7 +240,6 @@ struct cpudata {
u64 prev_aperf;
u64 prev_mperf;
u64 prev_tsc;
- u64 prev_cummulative_iowait;
struct sample sample;
int32_t min_perf_ratio;
int32_t max_perf_ratio;
@@ -3407,14 +3405,31 @@ static bool intel_pstate_hwp_is_enabled(void)
return !!(value & 0x1);
}
-static const struct x86_cpu_id intel_epp_balance_perf[] = {
+#define POWERSAVE_MASK GENMASK(7, 0)
+#define BALANCE_POWER_MASK GENMASK(15, 8)
+#define BALANCE_PERFORMANCE_MASK GENMASK(23, 16)
+#define PERFORMANCE_MASK GENMASK(31, 24)
+
+#define HWP_SET_EPP_VALUES(powersave, balance_power, balance_perf, performance) \
+ (FIELD_PREP_CONST(POWERSAVE_MASK, powersave) |\
+ FIELD_PREP_CONST(BALANCE_POWER_MASK, balance_power) |\
+ FIELD_PREP_CONST(BALANCE_PERFORMANCE_MASK, balance_perf) |\
+ FIELD_PREP_CONST(PERFORMANCE_MASK, performance))
+
+#define HWP_SET_DEF_BALANCE_PERF_EPP(balance_perf) \
+ (HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, HWP_EPP_BALANCE_POWERSAVE,\
+ balance_perf, HWP_EPP_PERFORMANCE))
+
+static const struct x86_cpu_id intel_epp_default[] = {
/*
* Set EPP value as 102, this is the max suggested EPP
* which can result in one core turbo frequency for
* AlderLake Mobile CPUs.
*/
- X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, 102),
- X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 32),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)),
+ X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)),
+ X86_MATCH_INTEL_FAM6_MODEL(METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE,
+ HWP_EPP_BALANCE_POWERSAVE, 115, 16)),
{}
};
@@ -3512,11 +3527,24 @@ hwp_cpu_matched:
intel_pstate_sysfs_expose_params();
if (hwp_active) {
- const struct x86_cpu_id *id = x86_match_cpu(intel_epp_balance_perf);
+ const struct x86_cpu_id *id = x86_match_cpu(intel_epp_default);
const struct x86_cpu_id *hybrid_id = x86_match_cpu(intel_hybrid_scaling_factor);
- if (id)
- epp_values[EPP_INDEX_BALANCE_PERFORMANCE] = id->driver_data;
+ if (id) {
+ epp_values[EPP_INDEX_POWERSAVE] =
+ FIELD_GET(POWERSAVE_MASK, id->driver_data);
+ epp_values[EPP_INDEX_BALANCE_POWERSAVE] =
+ FIELD_GET(BALANCE_POWER_MASK, id->driver_data);
+ epp_values[EPP_INDEX_BALANCE_PERFORMANCE] =
+ FIELD_GET(BALANCE_PERFORMANCE_MASK, id->driver_data);
+ epp_values[EPP_INDEX_PERFORMANCE] =
+ FIELD_GET(PERFORMANCE_MASK, id->driver_data);
+ pr_debug("Updated EPPs powersave:%x balanced power:%x balanced perf:%x performance:%x\n",
+ epp_values[EPP_INDEX_POWERSAVE],
+ epp_values[EPP_INDEX_BALANCE_POWERSAVE],
+ epp_values[EPP_INDEX_BALANCE_PERFORMANCE],
+ epp_values[EPP_INDEX_PERFORMANCE]);
+ }
if (hybrid_id) {
hybrid_scaling_factor = hybrid_id->driver_data;
diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c
index 4ee23f4ebf..3b4f6bfb2f 100644
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -30,6 +30,7 @@ struct scmi_data {
static struct scmi_protocol_handle *ph;
static const struct scmi_perf_proto_ops *perf_ops;
+static struct cpufreq_driver scmi_cpufreq_driver;
static unsigned int scmi_cpufreq_get_rate(unsigned int cpu)
{
@@ -144,6 +145,35 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power,
return 0;
}
+static int
+scmi_get_rate_limit(u32 domain, bool has_fast_switch)
+{
+ int ret, rate_limit;
+
+ if (has_fast_switch) {
+ /*
+ * Fast channels are used whenever available,
+ * so use their rate_limit value if populated.
+ */
+ ret = perf_ops->fast_switch_rate_limit(ph, domain,
+ &rate_limit);
+ if (!ret && rate_limit)
+ return rate_limit;
+ }
+
+ ret = perf_ops->rate_limit_get(ph, domain, &rate_limit);
+ if (ret)
+ return 0;
+
+ return rate_limit;
+}
+
+static struct freq_attr *scmi_cpufreq_hw_attr[] = {
+ &cpufreq_freq_attr_scaling_available_freqs,
+ NULL,
+ NULL,
+};
+
static int scmi_cpufreq_init(struct cpufreq_policy *policy)
{
int ret, nr_opp, domain;
@@ -250,6 +280,20 @@ static int scmi_cpufreq_init(struct cpufreq_policy *policy)
policy->fast_switch_possible =
perf_ops->fast_switch_possible(ph, domain);
+ policy->transition_delay_us =
+ scmi_get_rate_limit(domain, policy->fast_switch_possible);
+
+ if (policy_has_boost_freq(policy)) {
+ ret = cpufreq_enable_boost_support();
+ if (ret) {
+ dev_warn(cpu_dev, "failed to enable boost: %d\n", ret);
+ goto out_free_opp;
+ } else {
+ scmi_cpufreq_hw_attr[1] = &cpufreq_freq_attr_scaling_boost_freqs;
+ scmi_cpufreq_driver.boost_enabled = true;
+ }
+ }
+
return 0;
out_free_opp:
@@ -308,7 +352,7 @@ static struct cpufreq_driver scmi_cpufreq_driver = {
CPUFREQ_NEED_INITIAL_FREQ_CHECK |
CPUFREQ_IS_COOLING_DEV,
.verify = cpufreq_generic_frequency_table_verify,
- .attr = cpufreq_generic_attr,
+ .attr = scmi_cpufreq_hw_attr,
.target_index = scmi_cpufreq_set_target,
.fast_switch = scmi_cpufreq_fast_switch,
.get = scmi_cpufreq_get_rate,