diff options
Diffstat (limited to '')
-rw-r--r-- | arch/arm64/kernel/topology.c | 420 |
1 files changed, 420 insertions, 0 deletions
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 000000000..655a308af --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,420 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/acpi.h> +#include <linux/arch_topology.h> +#include <linux/cacheinfo.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/sched/topology.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/string.h> + +#include <asm/cpu.h> +#include <asm/cputype.h> +#include <asm/topology.h> + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + cpu = of_cpu_node_to_id(cpu_node); + if (cpu >= 0) + topology_parse_cpu_capacity(cpu_node, cpu); + else + pr_crit("Unable to find CPU node for %pOF\n", cpu_node); + + of_node_put(cpu_node); + return cpu; +} + +static int __init parse_core(struct device_node *core, int package_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%pOF: Can't get CPU for thread\n", + t); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%pOF: Core has both threads and CPU\n", + core); + return -EINVAL; + } + + cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%pOF: Can't get CPU for leaf core\n", core); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int package_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%pOF: cpu-map children should be clusters\n", + c); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, package_id, core_id++); + } else { + pr_err("%pOF: Non-leaf cluster with core %s\n", + cluster, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%pOF: empty cluster\n", cluster); + + if (leaf) + package_id++; + + return 0; +} + +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + topology_normalize_cpu_scale(); + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) + if (cpu_topology[cpu].package_id == -1) + ret = -EINVAL; + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + + /* Find the smaller of NUMA, core or LLC siblings */ + if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { + /* not numa in package, lets use the package siblings */ + core_mask = &cpu_topology[cpu].core_sibling; + } + if (cpu_topology[cpu].llc_id != -1) { + if (cpumask_subset(&cpu_topology[cpu].llc_sibling, core_mask)) + core_mask = &cpu_topology[cpu].llc_sibling; + } + + return core_mask; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_online_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->llc_id == cpu_topo->llc_id) { + cpumask_set_cpu(cpu, &cpuid_topo->llc_sibling); + cpumask_set_cpu(cpuid, &cpu_topo->llc_sibling); + } + + if (cpuid_topo->package_id != cpu_topo->package_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +void store_cpu_topology(unsigned int cpuid) +{ + struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + u64 mpidr; + + if (cpuid_topo->package_id != -1) + goto topology_populated; + + mpidr = read_cpuid_mpidr(); + + /* Uniprocessor systems can rely on default topology values */ + if (mpidr & MPIDR_UP_BITMASK) + return; + + /* + * This would be the place to create cpu topology based on MPIDR. + * + * However, it cannot be trusted to depict the actual topology; some + * pieces of the architecture enforce an artificial cap on Aff0 values + * (e.g. GICv3's ICC_SGI1R_EL1 limits it to 15), leading to an + * artificial cycling of Aff1, Aff2 and Aff3 values. IOW, these end up + * having absolutely no relationship to the actual underlying system + * topology, and cannot be reasonably used as core / package ID. + * + * If the MT bit is set, Aff0 *could* be used to define a thread ID, but + * we still wouldn't be able to obtain a sane core ID. This means we + * need to entirely ignore MPIDR for any topology deduction. + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = cpuid; + cpuid_topo->package_id = cpu_to_node(cpuid); + + pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", + cpuid, cpuid_topo->package_id, cpuid_topo->core_id, + cpuid_topo->thread_id, mpidr); + +topology_populated: + update_siblings_masks(cpuid); +} + +static void clear_cpu_topology(int cpu) +{ + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpumask_clear(&cpu_topo->llc_sibling); + cpumask_set_cpu(cpu, &cpu_topo->llc_sibling); + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); +} + +static void __init reset_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = 0; + cpu_topo->package_id = -1; + cpu_topo->llc_id = -1; + + clear_cpu_topology(cpu); + } +} + +void remove_cpu_topology(unsigned int cpu) +{ + int sibling; + + for_each_cpu(sibling, topology_core_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_core_cpumask(sibling)); + for_each_cpu(sibling, topology_sibling_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_sibling_cpumask(sibling)); + for_each_cpu(sibling, topology_llc_cpumask(cpu)) + cpumask_clear_cpu(cpu, topology_llc_cpumask(sibling)); + + clear_cpu_topology(cpu); +} + +#ifdef CONFIG_ACPI +static bool __init acpi_cpu_is_threaded(int cpu) +{ + int is_threaded = acpi_pptt_cpu_is_thread(cpu); + + /* + * if the PPTT doesn't have thread information, assume a homogeneous + * machine and return the current CPU's thread state. + */ + if (is_threaded < 0) + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; + + return !!is_threaded; +} + +/* + * Propagate the topology information of the processor_topology_node tree to the + * cpu_topology array. + */ +static int __init parse_acpi_topology(void) +{ + int cpu, topology_id; + + for_each_possible_cpu(cpu) { + int i, cache_id; + + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) + return topology_id; + + if (acpi_cpu_is_threaded(cpu)) { + cpu_topology[cpu].thread_id = topology_id; + topology_id = find_acpi_cpu_topology(cpu, 1); + cpu_topology[cpu].core_id = topology_id; + } else { + cpu_topology[cpu].thread_id = -1; + cpu_topology[cpu].core_id = topology_id; + } + topology_id = find_acpi_cpu_topology_package(cpu); + cpu_topology[cpu].package_id = topology_id; + + i = acpi_find_last_cache_level(cpu); + + if (i > 0) { + /* + * this is the only part of cpu_topology that has + * a direct relationship with the cache topology + */ + cache_id = find_acpi_cpu_cache_topology(cpu, i); + if (cache_id > 0) + cpu_topology[cpu].llc_id = cache_id; + } + } + + return 0; +} + +#else +static inline int __init parse_acpi_topology(void) +{ + return -EINVAL; +} +#endif + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (!acpi_disabled && parse_acpi_topology()) + reset_cpu_topology(); + else if (of_have_populated_dt() && parse_dt_topology()) + reset_cpu_topology(); +} |