summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/resctrl
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:47:48 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-18 18:47:48 +0000
commita1865fbd182b17f2d2f465f557af5b45501c5f1c (patch)
tree59da519ef2e59c763bb8efdbe67bc348cf833767 /arch/x86/kernel/cpu/resctrl
parentAdding upstream version 6.7.9. (diff)
downloadlinux-a1865fbd182b17f2d2f465f557af5b45501c5f1c.tar.xz
linux-a1865fbd182b17f2d2f465f557af5b45501c5f1c.zip
Adding upstream version 6.7.12.upstream/6.7.12
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'arch/x86/kernel/cpu/resctrl')
-rw-r--r--arch/x86/kernel/cpu/resctrl/core.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/internal.h8
-rw-r--r--arch/x86/kernel/cpu/resctrl/monitor.c48
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c14
4 files changed, 31 insertions, 49 deletions
diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c
index 19e0681f04..d04371e851 100644
--- a/arch/x86/kernel/cpu/resctrl/core.c
+++ b/arch/x86/kernel/cpu/resctrl/core.c
@@ -231,9 +231,7 @@ static bool __get_mem_config_intel(struct rdt_resource *r)
static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
- union cpuid_0x10_3_eax eax;
- union cpuid_0x10_x_edx edx;
- u32 ebx, ecx, subleaf;
+ u32 eax, ebx, ecx, edx, subleaf;
/*
* Query CPUID_Fn80000020_EDX_x01 for MBA and
@@ -241,9 +239,9 @@ static bool __rdt_get_mem_config_amd(struct rdt_resource *r)
*/
subleaf = (r->rid == RDT_RESOURCE_SMBA) ? 2 : 1;
- cpuid_count(0x80000020, subleaf, &eax.full, &ebx, &ecx, &edx.full);
- hw_res->num_closid = edx.split.cos_max + 1;
- r->default_ctrl = MAX_MBA_BW_AMD;
+ cpuid_count(0x80000020, subleaf, &eax, &ebx, &ecx, &edx);
+ hw_res->num_closid = edx + 1;
+ r->default_ctrl = 1 << eax;
/* AMD does not use delay */
r->membw.delay_linear = false;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index a4f1aa15f0..52e7e7deee 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -18,7 +18,6 @@
#define MBM_OVERFLOW_INTERVAL 1000
#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
-#define MAX_MBA_BW_AMD 0x800
#define MBM_CNTR_WIDTH_OFFSET_AMD 20
#define RMID_VAL_ERROR BIT_ULL(63)
@@ -296,14 +295,10 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @prev_bw_bytes: Previous bytes value read for bandwidth calculation
* @prev_bw: The most recent bandwidth in MBps
- * @delta_bw: Difference between the current and previous bandwidth
- * @delta_comp: Indicates whether to compute the delta_bw
*/
struct mbm_state {
u64 prev_bw_bytes;
u32 prev_bw;
- u32 delta_bw;
- bool delta_comp;
};
/**
@@ -395,6 +390,8 @@ struct rdt_parse_data {
* @msr_update: Function pointer to update QOS MSRs
* @mon_scale: cqm counter * mon_scale = occupancy in bytes
* @mbm_width: Monitor width, to detect and correct for overflow.
+ * @mbm_cfg_mask: Bandwidth sources that can be tracked when Bandwidth
+ * Monitoring Event Configuration (BMEC) is supported.
* @cdp_enabled: CDP state of this resource
*
* Members of this structure are either private to the architecture
@@ -409,6 +406,7 @@ struct rdt_hw_resource {
struct rdt_resource *r);
unsigned int mon_scale;
unsigned int mbm_width;
+ unsigned int mbm_cfg_mask;
bool cdp_enabled;
};
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index f136ac0468..3a6c069614 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -440,9 +440,6 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
cur_bw = bytes / SZ_1M;
- if (m->delta_comp)
- m->delta_bw = abs(cur_bw - m->prev_bw);
- m->delta_comp = false;
m->prev_bw = cur_bw;
}
@@ -520,11 +517,11 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
{
u32 closid, rmid, cur_msr_val, new_msr_val;
struct mbm_state *pmbm_data, *cmbm_data;
- u32 cur_bw, delta_bw, user_bw;
struct rdt_resource *r_mba;
struct rdt_domain *dom_mba;
struct list_head *head;
struct rdtgroup *entry;
+ u32 cur_bw, user_bw;
if (!is_mbm_local_enabled())
return;
@@ -543,7 +540,6 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
cur_bw = pmbm_data->prev_bw;
user_bw = dom_mba->mbps_val[closid];
- delta_bw = pmbm_data->delta_bw;
/* MBA resource doesn't support CDP */
cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
@@ -555,49 +551,31 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
list_for_each_entry(entry, head, mon.crdtgrp_list) {
cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
cur_bw += cmbm_data->prev_bw;
- delta_bw += cmbm_data->delta_bw;
}
/*
* Scale up/down the bandwidth linearly for the ctrl group. The
* bandwidth step is the bandwidth granularity specified by the
* hardware.
- *
- * The delta_bw is used when increasing the bandwidth so that we
- * dont alternately increase and decrease the control values
- * continuously.
- *
- * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
- * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
- * switching between 90 and 110 continuously if we only check
- * cur_bw < user_bw.
+ * Always increase throttling if current bandwidth is above the
+ * target set by user.
+ * But avoid thrashing up and down on every poll by checking
+ * whether a decrease in throttling is likely to push the group
+ * back over target. E.g. if currently throttling to 30% of bandwidth
+ * on a system with 10% granularity steps, check whether moving to
+ * 40% would go past the limit by multiplying current bandwidth by
+ * "(30 + 10) / 30".
*/
if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
} else if (cur_msr_val < MAX_MBA_BW &&
- (user_bw > (cur_bw + delta_bw))) {
+ (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
} else {
return;
}
resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
-
- /*
- * Delta values are updated dynamically package wise for each
- * rdtgrp every time the throttle MSR changes value.
- *
- * This is because (1)the increase in bandwidth is not perfectly
- * linear and only "approximately" linear even when the hardware
- * says it is linear.(2)Also since MBA is a core specific
- * mechanism, the delta values vary based on number of cores used
- * by the rdtgrp.
- */
- pmbm_data->delta_comp = true;
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
- cmbm_data->delta_comp = true;
- }
}
static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid)
@@ -813,6 +791,12 @@ int __init rdt_get_mon_l3_config(struct rdt_resource *r)
return ret;
if (rdt_cpu_has(X86_FEATURE_BMEC)) {
+ u32 eax, ebx, ecx, edx;
+
+ /* Detect list of bandwidth sources that can be tracked */
+ cpuid_count(0x80000020, 3, &eax, &ebx, &ecx, &edx);
+ hw_res->mbm_cfg_mask = ecx & MAX_EVT_CONFIG_BITS;
+
if (rdt_cpu_has(X86_FEATURE_CQM_MBM_TOTAL)) {
mbm_total_event.configurable = true;
mbm_config_rftype_init("mbm_total_bytes_config");
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 69a1de9238..2b69e560b0 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -1620,12 +1620,6 @@ static int mbm_config_write_domain(struct rdt_resource *r,
struct mon_config_info mon_info = {0};
int ret = 0;
- /* mon_config cannot be more than the supported set of events */
- if (val > MAX_EVT_CONFIG_BITS) {
- rdt_last_cmd_puts("Invalid event configuration\n");
- return -EINVAL;
- }
-
/*
* Read the current config value first. If both are the same then
* no need to write it again.
@@ -1663,6 +1657,7 @@ out:
static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
{
+ struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
char *dom_str = NULL, *id_str;
unsigned long dom_id, val;
struct rdt_domain *d;
@@ -1686,6 +1681,13 @@ next:
return -EINVAL;
}
+ /* Value from user cannot be more than the supported set of events */
+ if ((val & hw_res->mbm_cfg_mask) != val) {
+ rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
+ hw_res->mbm_cfg_mask);
+ return -EINVAL;
+ }
+
list_for_each_entry(d, &r->domains, list) {
if (d->id == dom_id) {
ret = mbm_config_write_domain(r, d, evtid, val);