summaryrefslogtreecommitdiffstats
path: root/kernel/sched/topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/topology.c')
-rw-r--r--kernel/sched/topology.c100
1 files changed, 48 insertions, 52 deletions
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 10d1391e74..a6994a1fcc 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -322,7 +322,6 @@ static struct ctl_table sched_energy_aware_sysctls[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
- {}
};
static int __init sched_energy_aware_sysctl_init(void)
@@ -657,13 +656,13 @@ static void destroy_sched_domains(struct sched_domain *sd)
}
/*
- * Keep a special pointer to the highest sched_domain that has
- * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
- * allows us to avoid some pointer chasing select_idle_sibling().
+ * Keep a special pointer to the highest sched_domain that has SD_SHARE_LLC set
+ * (Last Level Cache Domain) for this allows us to avoid some pointer chasing
+ * select_idle_sibling().
*
- * Also keep a unique ID per domain (we use the first CPU number in
- * the cpumask of the domain), this allows us to quickly tell if
- * two CPUs are in the same cache domain, see cpus_share_cache().
+ * Also keep a unique ID per domain (we use the first CPU number in the cpumask
+ * of the domain), this allows us to quickly tell if two CPUs are in the same
+ * cache domain, see cpus_share_cache().
*/
DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
DEFINE_PER_CPU(int, sd_llc_size);
@@ -684,7 +683,7 @@ static void update_top_cache_domain(int cpu)
int id = cpu;
int size = 1;
- sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
+ sd = highest_flag_domain(cpu, SD_SHARE_LLC);
if (sd) {
id = cpumask_first(sched_domain_span(sd));
size = cpumask_weight(sched_domain_span(sd));
@@ -1330,23 +1329,12 @@ next:
}
/*
- * Asymmetric CPU capacity bits
- */
-struct asym_cap_data {
- struct list_head link;
- unsigned long capacity;
- unsigned long cpus[];
-};
-
-/*
* Set of available CPUs grouped by their corresponding capacities
* Each list entry contains a CPU mask reflecting CPUs that share the same
* capacity.
* The lifespan of data is unlimited.
*/
-static LIST_HEAD(asym_cap_list);
-
-#define cpu_capacity_span(asym_data) to_cpumask((asym_data)->cpus)
+LIST_HEAD(asym_cap_list);
/*
* Verify whether there is any CPU capacity asymmetry in a given sched domain.
@@ -1386,21 +1374,39 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
}
+static void free_asym_cap_entry(struct rcu_head *head)
+{
+ struct asym_cap_data *entry = container_of(head, struct asym_cap_data, rcu);
+ kfree(entry);
+}
+
static inline void asym_cpu_capacity_update_data(int cpu)
{
unsigned long capacity = arch_scale_cpu_capacity(cpu);
- struct asym_cap_data *entry = NULL;
+ struct asym_cap_data *insert_entry = NULL;
+ struct asym_cap_data *entry;
+ /*
+ * Search if capacity already exits. If not, track which the entry
+ * where we should insert to keep the list ordered descendingly.
+ */
list_for_each_entry(entry, &asym_cap_list, link) {
if (capacity == entry->capacity)
goto done;
+ else if (!insert_entry && capacity > entry->capacity)
+ insert_entry = list_prev_entry(entry, link);
}
entry = kzalloc(sizeof(*entry) + cpumask_size(), GFP_KERNEL);
if (WARN_ONCE(!entry, "Failed to allocate memory for asymmetry data\n"))
return;
entry->capacity = capacity;
- list_add(&entry->link, &asym_cap_list);
+
+ /* If NULL then the new capacity is the smallest, add last. */
+ if (!insert_entry)
+ list_add_tail_rcu(&entry->link, &asym_cap_list);
+ else
+ list_add_rcu(&entry->link, &insert_entry->link);
done:
__cpumask_set_cpu(cpu, cpu_capacity_span(entry));
}
@@ -1423,8 +1429,8 @@ static void asym_cpu_capacity_scan(void)
list_for_each_entry_safe(entry, next, &asym_cap_list, link) {
if (cpumask_empty(cpu_capacity_span(entry))) {
- list_del(&entry->link);
- kfree(entry);
+ list_del_rcu(&entry->link);
+ call_rcu(&entry->rcu, free_asym_cap_entry);
}
}
@@ -1434,8 +1440,8 @@ static void asym_cpu_capacity_scan(void)
*/
if (list_is_singular(&asym_cap_list)) {
entry = list_first_entry(&asym_cap_list, typeof(*entry), link);
- list_del(&entry->link);
- kfree(entry);
+ list_del_rcu(&entry->link);
+ call_rcu(&entry->rcu, free_asym_cap_entry);
}
}
@@ -1468,7 +1474,7 @@ static void set_domain_attribute(struct sched_domain *sd,
} else
request = attr->relax_domain_level;
- if (sd->level > request) {
+ if (sd->level >= request) {
/* Turn off idle balance on this domain: */
sd->flags &= ~(SD_BALANCE_WAKE|SD_BALANCE_NEWIDLE);
}
@@ -1551,11 +1557,12 @@ static struct cpumask ***sched_domains_numa_masks;
*
* These flags are purely descriptive of the topology and do not prescribe
* behaviour. Behaviour is artificial and mapped in the below sd_init()
- * function:
+ * function. For details, see include/linux/sched/sd_flags.h.
*
- * SD_SHARE_CPUCAPACITY - describes SMT topologies
- * SD_SHARE_PKG_RESOURCES - describes shared caches
- * SD_NUMA - describes NUMA topologies
+ * SD_SHARE_CPUCAPACITY
+ * SD_SHARE_LLC
+ * SD_CLUSTER
+ * SD_NUMA
*
* Odd one out, which beside describing the topology has a quirk also
* prescribes the desired behaviour that goes along with it:
@@ -1565,7 +1572,7 @@ static struct cpumask ***sched_domains_numa_masks;
#define TOPOLOGY_SD_FLAGS \
(SD_SHARE_CPUCAPACITY | \
SD_CLUSTER | \
- SD_SHARE_PKG_RESOURCES | \
+ SD_SHARE_LLC | \
SD_NUMA | \
SD_ASYM_PACKING)
@@ -1608,7 +1615,7 @@ sd_init(struct sched_domain_topology_level *tl,
| 0*SD_BALANCE_WAKE
| 1*SD_WAKE_AFFINE
| 0*SD_SHARE_CPUCAPACITY
- | 0*SD_SHARE_PKG_RESOURCES
+ | 0*SD_SHARE_LLC
| 0*SD_SERIALIZE
| 1*SD_PREFER_SIBLING
| 0*SD_NUMA
@@ -1645,7 +1652,7 @@ sd_init(struct sched_domain_topology_level *tl,
if (sd->flags & SD_SHARE_CPUCAPACITY) {
sd->imbalance_pct = 110;
- } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ } else if (sd->flags & SD_SHARE_LLC) {
sd->imbalance_pct = 117;
sd->cache_nice_tries = 1;
@@ -1670,7 +1677,7 @@ sd_init(struct sched_domain_topology_level *tl,
* For all levels sharing cache; connect a sched_domain_shared
* instance.
*/
- if (sd->flags & SD_SHARE_PKG_RESOURCES) {
+ if (sd->flags & SD_SHARE_LLC) {
sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
atomic_inc(&sd->shared->ref);
atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
@@ -2346,7 +2353,7 @@ static struct sched_domain *build_sched_domain(struct sched_domain_topology_leve
static bool topology_span_sane(struct sched_domain_topology_level *tl,
const struct cpumask *cpu_map, int cpu)
{
- int i;
+ int i = cpu + 1;
/* NUMA levels are allowed to overlap */
if (tl->flags & SDTL_OVERLAP)
@@ -2358,9 +2365,7 @@ static bool topology_span_sane(struct sched_domain_topology_level *tl,
* breaking the sched_group lists - i.e. a later get_group() pass
* breaks the linking done for an earlier span.
*/
- for_each_cpu(i, cpu_map) {
- if (i == cpu)
- continue;
+ for_each_cpu_from(i, cpu_map) {
/*
* We should 'and' all those masks with 'cpu_map' to exactly
* match the topology we're about to build, but that can only
@@ -2445,8 +2450,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
struct sched_domain *child = sd->child;
- if (!(sd->flags & SD_SHARE_PKG_RESOURCES) && child &&
- (child->flags & SD_SHARE_PKG_RESOURCES)) {
+ if (!(sd->flags & SD_SHARE_LLC) && child &&
+ (child->flags & SD_SHARE_LLC)) {
struct sched_domain __rcu *top_p;
unsigned int nr_llcs;
@@ -2506,16 +2511,9 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
/* Attach the domains */
rcu_read_lock();
for_each_cpu(i, cpu_map) {
- unsigned long capacity;
-
rq = cpu_rq(i);
sd = *per_cpu_ptr(d.sd, i);
- capacity = arch_scale_cpu_capacity(i);
- /* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
- if (capacity > READ_ONCE(d.rd->max_cpu_capacity))
- WRITE_ONCE(d.rd->max_cpu_capacity, capacity);
-
cpu_attach_domain(sd, d.rd, i);
if (lowest_flag_domain(i, SD_CLUSTER))
@@ -2529,10 +2527,8 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (has_cluster)
static_branch_inc_cpuslocked(&sched_cluster_active);
- if (rq && sched_debug_verbose) {
- pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
- cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
- }
+ if (rq && sched_debug_verbose)
+ pr_info("root domain span: %*pbl\n", cpumask_pr_args(cpu_map));
ret = 0;
error: