diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 18:49:45 +0000 |
commit | 2c3c1048746a4622d8c89a29670120dc8fab93c4 (patch) | |
tree | 848558de17fb3008cdf4d861b01ac7781903ce39 /kernel/sched/cpufreq_schedutil.c | |
parent | Initial commit. (diff) | |
download | linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.tar.xz linux-2c3c1048746a4622d8c89a29670120dc8fab93c4.zip |
Adding upstream version 6.1.76.upstream/6.1.76
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 863 |
1 files changed, 863 insertions, 0 deletions
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c new file mode 100644 index 000000000..853a07618 --- /dev/null +++ b/kernel/sched/cpufreq_schedutil.c @@ -0,0 +1,863 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CPUFreq governor based on scheduler-provided CPU utilization data. + * + * Copyright (C) 2016, Intel Corporation + * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com> + */ + +#define IOWAIT_BOOST_MIN (SCHED_CAPACITY_SCALE / 8) + +struct sugov_tunables { + struct gov_attr_set attr_set; + unsigned int rate_limit_us; +}; + +struct sugov_policy { + struct cpufreq_policy *policy; + + struct sugov_tunables *tunables; + struct list_head tunables_hook; + + raw_spinlock_t update_lock; + u64 last_freq_update_time; + s64 freq_update_delay_ns; + unsigned int next_freq; + unsigned int cached_raw_freq; + + /* The next fields are only needed if fast switch cannot be used: */ + struct irq_work irq_work; + struct kthread_work work; + struct mutex work_lock; + struct kthread_worker worker; + struct task_struct *thread; + bool work_in_progress; + + bool limits_changed; + bool need_freq_update; +}; + +struct sugov_cpu { + struct update_util_data update_util; + struct sugov_policy *sg_policy; + unsigned int cpu; + + bool iowait_boost_pending; + unsigned int iowait_boost; + u64 last_update; + + unsigned long util; + unsigned long bw_dl; + unsigned long max; + + /* The field below is for single-CPU policies only: */ +#ifdef CONFIG_NO_HZ_COMMON + unsigned long saved_idle_calls; +#endif +}; + +static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); + +/************************ Governor internals ***********************/ + +static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time) +{ + s64 delta_ns; + + /* + * Since cpufreq_update_util() is called with rq->lock held for + * the @target_cpu, our per-CPU data is fully serialized. + * + * However, drivers cannot in general deal with cross-CPU + * requests, so while get_next_freq() will work, our + * sugov_update_commit() call may not for the fast switching platforms. + * + * Hence stop here for remote requests if they aren't supported + * by the hardware, as calculating the frequency is pointless if + * we cannot in fact act on it. + * + * This is needed on the slow switching platforms too to prevent CPUs + * going offline from leaving stale IRQ work items behind. + */ + if (!cpufreq_this_cpu_can_update(sg_policy->policy)) + return false; + + if (unlikely(sg_policy->limits_changed)) { + sg_policy->limits_changed = false; + sg_policy->need_freq_update = true; + return true; + } + + delta_ns = time - sg_policy->last_freq_update_time; + + return delta_ns >= sg_policy->freq_update_delay_ns; +} + +static bool sugov_update_next_freq(struct sugov_policy *sg_policy, u64 time, + unsigned int next_freq) +{ + if (sg_policy->need_freq_update) + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + else if (sg_policy->next_freq == next_freq) + return false; + + sg_policy->next_freq = next_freq; + sg_policy->last_freq_update_time = time; + + return true; +} + +static void sugov_deferred_update(struct sugov_policy *sg_policy) +{ + if (!sg_policy->work_in_progress) { + sg_policy->work_in_progress = true; + irq_work_queue(&sg_policy->irq_work); + } +} + +/** + * get_next_freq - Compute a new frequency for a given cpufreq policy. + * @sg_policy: schedutil policy object to compute the new frequency for. + * @util: Current CPU utilization. + * @max: CPU capacity. + * + * If the utilization is frequency-invariant, choose the new frequency to be + * proportional to it, that is + * + * next_freq = C * max_freq * util / max + * + * Otherwise, approximate the would-be frequency-invariant utilization by + * util_raw * (curr_freq / max_freq) which leads to + * + * next_freq = C * curr_freq * util_raw / max + * + * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8. + * + * The lowest driver-supported frequency which is equal or greater than the raw + * next_freq (as calculated above) is returned, subject to policy min/max and + * cpufreq driver limitations. + */ +static unsigned int get_next_freq(struct sugov_policy *sg_policy, + unsigned long util, unsigned long max) +{ + struct cpufreq_policy *policy = sg_policy->policy; + unsigned int freq = arch_scale_freq_invariant() ? + policy->cpuinfo.max_freq : policy->cur; + + util = map_util_perf(util); + freq = map_util_freq(util, freq, max); + + if (freq == sg_policy->cached_raw_freq && !sg_policy->need_freq_update) + return sg_policy->next_freq; + + sg_policy->cached_raw_freq = freq; + return cpufreq_driver_resolve_freq(policy, freq); +} + +static void sugov_get_util(struct sugov_cpu *sg_cpu) +{ + struct rq *rq = cpu_rq(sg_cpu->cpu); + + sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); + sg_cpu->bw_dl = cpu_bw_dl(rq); + sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), + FREQUENCY_UTIL, NULL); +} + +/** + * sugov_iowait_reset() - Reset the IO boost status of a CPU. + * @sg_cpu: the sugov data for the CPU to boost + * @time: the update time from the caller + * @set_iowait_boost: true if an IO boost has been requested + * + * The IO wait boost of a task is disabled after a tick since the last update + * of a CPU. If a new IO wait boost is requested after more then a tick, then + * we enable the boost starting from IOWAIT_BOOST_MIN, which improves energy + * efficiency by ignoring sporadic wakeups from IO. + */ +static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, + bool set_iowait_boost) +{ + s64 delta_ns = time - sg_cpu->last_update; + + /* Reset boost only if a tick has elapsed since last request */ + if (delta_ns <= TICK_NSEC) + return false; + + sg_cpu->iowait_boost = set_iowait_boost ? IOWAIT_BOOST_MIN : 0; + sg_cpu->iowait_boost_pending = set_iowait_boost; + + return true; +} + +/** + * sugov_iowait_boost() - Updates the IO boost status of a CPU. + * @sg_cpu: the sugov data for the CPU to boost + * @time: the update time from the caller + * @flags: SCHED_CPUFREQ_IOWAIT if the task is waking up after an IO wait + * + * Each time a task wakes up after an IO operation, the CPU utilization can be + * boosted to a certain utilization which doubles at each "frequent and + * successive" wakeup from IO, ranging from IOWAIT_BOOST_MIN to the utilization + * of the maximum OPP. + * + * To keep doubling, an IO boost has to be requested at least once per tick, + * otherwise we restart from the utilization of the minimum OPP. + */ +static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, + unsigned int flags) +{ + bool set_iowait_boost = flags & SCHED_CPUFREQ_IOWAIT; + + /* Reset boost if the CPU appears to have been idle enough */ + if (sg_cpu->iowait_boost && + sugov_iowait_reset(sg_cpu, time, set_iowait_boost)) + return; + + /* Boost only tasks waking up after IO */ + if (!set_iowait_boost) + return; + + /* Ensure boost doubles only one time at each request */ + if (sg_cpu->iowait_boost_pending) + return; + sg_cpu->iowait_boost_pending = true; + + /* Double the boost at each request */ + if (sg_cpu->iowait_boost) { + sg_cpu->iowait_boost = + min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); + return; + } + + /* First wakeup after IO: start with minimum boost */ + sg_cpu->iowait_boost = IOWAIT_BOOST_MIN; +} + +/** + * sugov_iowait_apply() - Apply the IO boost to a CPU. + * @sg_cpu: the sugov data for the cpu to boost + * @time: the update time from the caller + * + * A CPU running a task which woken up after an IO operation can have its + * utilization boosted to speed up the completion of those IO operations. + * The IO boost value is increased each time a task wakes up from IO, in + * sugov_iowait_apply(), and it's instead decreased by this function, + * each time an increase has not been requested (!iowait_boost_pending). + * + * A CPU which also appears to have been idle for at least one tick has also + * its IO boost utilization reset. + * + * This mechanism is designed to boost high frequently IO waiting tasks, while + * being more conservative on tasks which does sporadic IO operations. + */ +static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) +{ + unsigned long boost; + + /* No boost currently required */ + if (!sg_cpu->iowait_boost) + return; + + /* Reset boost if the CPU appears to have been idle enough */ + if (sugov_iowait_reset(sg_cpu, time, false)) + return; + + if (!sg_cpu->iowait_boost_pending) { + /* + * No boost pending; reduce the boost value. + */ + sg_cpu->iowait_boost >>= 1; + if (sg_cpu->iowait_boost < IOWAIT_BOOST_MIN) { + sg_cpu->iowait_boost = 0; + return; + } + } + + sg_cpu->iowait_boost_pending = false; + + /* + * sg_cpu->util is already in capacity scale; convert iowait_boost + * into the same scale so we can compare. + */ + boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); + if (sg_cpu->util < boost) + sg_cpu->util = boost; +} + +#ifdef CONFIG_NO_HZ_COMMON +static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) +{ + unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu); + bool ret = idle_calls == sg_cpu->saved_idle_calls; + + sg_cpu->saved_idle_calls = idle_calls; + return ret; +} +#else +static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; } +#endif /* CONFIG_NO_HZ_COMMON */ + +/* + * Make sugov_should_update_freq() ignore the rate limit when DL + * has increased the utilization. + */ +static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu) +{ + if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl) + sg_cpu->sg_policy->limits_changed = true; +} + +static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu, + u64 time, unsigned int flags) +{ + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + + ignore_dl_rate_limit(sg_cpu); + + if (!sugov_should_update_freq(sg_cpu->sg_policy, time)) + return false; + + sugov_get_util(sg_cpu); + sugov_iowait_apply(sg_cpu, time); + + return true; +} + +static void sugov_update_single_freq(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned int cached_freq = sg_policy->cached_raw_freq; + unsigned int next_f; + + if (!sugov_update_single_common(sg_cpu, time, flags)) + return; + + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); + /* + * Do not reduce the frequency if the CPU has not been idle + * recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. + */ + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && next_f < sg_policy->next_freq && + !sg_policy->need_freq_update) { + next_f = sg_policy->next_freq; + + /* Restore cached freq as next_freq has changed */ + sg_policy->cached_raw_freq = cached_freq; + } + + if (!sugov_update_next_freq(sg_policy, time, next_f)) + return; + + /* + * This code runs under rq->lock for the target CPU, so it won't run + * concurrently on two different CPUs for the same target and it is not + * necessary to acquire the lock in the fast switch case. + */ + if (sg_policy->policy->fast_switch_enabled) { + cpufreq_driver_fast_switch(sg_policy->policy, next_f); + } else { + raw_spin_lock(&sg_policy->update_lock); + sugov_deferred_update(sg_policy); + raw_spin_unlock(&sg_policy->update_lock); + } +} + +static void sugov_update_single_perf(struct update_util_data *hook, u64 time, + unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + unsigned long prev_util = sg_cpu->util; + + /* + * Fall back to the "frequency" path if frequency invariance is not + * supported, because the direct mapping between the utilization and + * the performance levels depends on the frequency invariance. + */ + if (!arch_scale_freq_invariant()) { + sugov_update_single_freq(hook, time, flags); + return; + } + + if (!sugov_update_single_common(sg_cpu, time, flags)) + return; + + /* + * Do not reduce the target performance level if the CPU has not been + * idle recently, as the reduction is likely to be premature then. + * + * Except when the rq is capped by uclamp_max. + */ + if (!uclamp_rq_is_capped(cpu_rq(sg_cpu->cpu)) && + sugov_cpu_is_busy(sg_cpu) && sg_cpu->util < prev_util) + sg_cpu->util = prev_util; + + cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), + map_util_perf(sg_cpu->util), sg_cpu->max); + + sg_cpu->sg_policy->last_freq_update_time = time; +} + +static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) +{ + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + struct cpufreq_policy *policy = sg_policy->policy; + unsigned long util = 0, max = 1; + unsigned int j; + + for_each_cpu(j, policy->cpus) { + struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); + unsigned long j_util, j_max; + + sugov_get_util(j_sg_cpu); + sugov_iowait_apply(j_sg_cpu, time); + j_util = j_sg_cpu->util; + j_max = j_sg_cpu->max; + + if (j_util * max > j_max * util) { + util = j_util; + max = j_max; + } + } + + return get_next_freq(sg_policy, util, max); +} + +static void +sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags) +{ + struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; + unsigned int next_f; + + raw_spin_lock(&sg_policy->update_lock); + + sugov_iowait_boost(sg_cpu, time, flags); + sg_cpu->last_update = time; + + ignore_dl_rate_limit(sg_cpu); + + if (sugov_should_update_freq(sg_policy, time)) { + next_f = sugov_next_freq_shared(sg_cpu, time); + + if (!sugov_update_next_freq(sg_policy, time, next_f)) + goto unlock; + + if (sg_policy->policy->fast_switch_enabled) + cpufreq_driver_fast_switch(sg_policy->policy, next_f); + else + sugov_deferred_update(sg_policy); + } +unlock: + raw_spin_unlock(&sg_policy->update_lock); +} + +static void sugov_work(struct kthread_work *work) +{ + struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work); + unsigned int freq; + unsigned long flags; + + /* + * Hold sg_policy->update_lock shortly to handle the case where: + * in case sg_policy->next_freq is read here, and then updated by + * sugov_deferred_update() just before work_in_progress is set to false + * here, we may miss queueing the new update. + * + * Note: If a work was queued after the update_lock is released, + * sugov_work() will just be called again by kthread_work code; and the + * request will be proceed before the sugov thread sleeps. + */ + raw_spin_lock_irqsave(&sg_policy->update_lock, flags); + freq = sg_policy->next_freq; + sg_policy->work_in_progress = false; + raw_spin_unlock_irqrestore(&sg_policy->update_lock, flags); + + mutex_lock(&sg_policy->work_lock); + __cpufreq_driver_target(sg_policy->policy, freq, CPUFREQ_RELATION_L); + mutex_unlock(&sg_policy->work_lock); +} + +static void sugov_irq_work(struct irq_work *irq_work) +{ + struct sugov_policy *sg_policy; + + sg_policy = container_of(irq_work, struct sugov_policy, irq_work); + + kthread_queue_work(&sg_policy->worker, &sg_policy->work); +} + +/************************** sysfs interface ************************/ + +static struct sugov_tunables *global_tunables; +static DEFINE_MUTEX(global_tunables_lock); + +static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set) +{ + return container_of(attr_set, struct sugov_tunables, attr_set); +} + +static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + + return sprintf(buf, "%u\n", tunables->rate_limit_us); +} + +static ssize_t +rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count) +{ + struct sugov_tunables *tunables = to_sugov_tunables(attr_set); + struct sugov_policy *sg_policy; + unsigned int rate_limit_us; + + if (kstrtouint(buf, 10, &rate_limit_us)) + return -EINVAL; + + tunables->rate_limit_us = rate_limit_us; + + list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook) + sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC; + + return count; +} + +static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us); + +static struct attribute *sugov_attrs[] = { + &rate_limit_us.attr, + NULL +}; +ATTRIBUTE_GROUPS(sugov); + +static void sugov_tunables_free(struct kobject *kobj) +{ + struct gov_attr_set *attr_set = to_gov_attr_set(kobj); + + kfree(to_sugov_tunables(attr_set)); +} + +static struct kobj_type sugov_tunables_ktype = { + .default_groups = sugov_groups, + .sysfs_ops = &governor_sysfs_ops, + .release = &sugov_tunables_free, +}; + +/********************** cpufreq governor interface *********************/ + +struct cpufreq_governor schedutil_gov; + +static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + + sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL); + if (!sg_policy) + return NULL; + + sg_policy->policy = policy; + raw_spin_lock_init(&sg_policy->update_lock); + return sg_policy; +} + +static void sugov_policy_free(struct sugov_policy *sg_policy) +{ + kfree(sg_policy); +} + +static int sugov_kthread_create(struct sugov_policy *sg_policy) +{ + struct task_struct *thread; + struct sched_attr attr = { + .size = sizeof(struct sched_attr), + .sched_policy = SCHED_DEADLINE, + .sched_flags = SCHED_FLAG_SUGOV, + .sched_nice = 0, + .sched_priority = 0, + /* + * Fake (unused) bandwidth; workaround to "fix" + * priority inheritance. + */ + .sched_runtime = 1000000, + .sched_deadline = 10000000, + .sched_period = 10000000, + }; + struct cpufreq_policy *policy = sg_policy->policy; + int ret; + + /* kthread only required for slow path */ + if (policy->fast_switch_enabled) + return 0; + + kthread_init_work(&sg_policy->work, sugov_work); + kthread_init_worker(&sg_policy->worker); + thread = kthread_create(kthread_worker_fn, &sg_policy->worker, + "sugov:%d", + cpumask_first(policy->related_cpus)); + if (IS_ERR(thread)) { + pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread)); + return PTR_ERR(thread); + } + + ret = sched_setattr_nocheck(thread, &attr); + if (ret) { + kthread_stop(thread); + pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__); + return ret; + } + + sg_policy->thread = thread; + kthread_bind_mask(thread, policy->related_cpus); + init_irq_work(&sg_policy->irq_work, sugov_irq_work); + mutex_init(&sg_policy->work_lock); + + wake_up_process(thread); + + return 0; +} + +static void sugov_kthread_stop(struct sugov_policy *sg_policy) +{ + /* kthread only required for slow path */ + if (sg_policy->policy->fast_switch_enabled) + return; + + kthread_flush_worker(&sg_policy->worker); + kthread_stop(sg_policy->thread); + mutex_destroy(&sg_policy->work_lock); +} + +static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy) +{ + struct sugov_tunables *tunables; + + tunables = kzalloc(sizeof(*tunables), GFP_KERNEL); + if (tunables) { + gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook); + if (!have_governor_per_policy()) + global_tunables = tunables; + } + return tunables; +} + +static void sugov_clear_global_tunables(void) +{ + if (!have_governor_per_policy()) + global_tunables = NULL; +} + +static int sugov_init(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy; + struct sugov_tunables *tunables; + int ret = 0; + + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + + cpufreq_enable_fast_switch(policy); + + sg_policy = sugov_policy_alloc(policy); + if (!sg_policy) { + ret = -ENOMEM; + goto disable_fast_switch; + } + + ret = sugov_kthread_create(sg_policy); + if (ret) + goto free_sg_policy; + + mutex_lock(&global_tunables_lock); + + if (global_tunables) { + if (WARN_ON(have_governor_per_policy())) { + ret = -EINVAL; + goto stop_kthread; + } + policy->governor_data = sg_policy; + sg_policy->tunables = global_tunables; + + gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook); + goto out; + } + + tunables = sugov_tunables_alloc(sg_policy); + if (!tunables) { + ret = -ENOMEM; + goto stop_kthread; + } + + tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy); + + policy->governor_data = sg_policy; + sg_policy->tunables = tunables; + + ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype, + get_governor_parent_kobj(policy), "%s", + schedutil_gov.name); + if (ret) + goto fail; + +out: + mutex_unlock(&global_tunables_lock); + return 0; + +fail: + kobject_put(&tunables->attr_set.kobj); + policy->governor_data = NULL; + sugov_clear_global_tunables(); + +stop_kthread: + sugov_kthread_stop(sg_policy); + mutex_unlock(&global_tunables_lock); + +free_sg_policy: + sugov_policy_free(sg_policy); + +disable_fast_switch: + cpufreq_disable_fast_switch(policy); + + pr_err("initialization failed (error %d)\n", ret); + return ret; +} + +static void sugov_exit(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + struct sugov_tunables *tunables = sg_policy->tunables; + unsigned int count; + + mutex_lock(&global_tunables_lock); + + count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook); + policy->governor_data = NULL; + if (!count) + sugov_clear_global_tunables(); + + mutex_unlock(&global_tunables_lock); + + sugov_kthread_stop(sg_policy); + sugov_policy_free(sg_policy); + cpufreq_disable_fast_switch(policy); +} + +static int sugov_start(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); + unsigned int cpu; + + sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; + sg_policy->last_freq_update_time = 0; + sg_policy->next_freq = 0; + sg_policy->work_in_progress = false; + sg_policy->limits_changed = false; + sg_policy->cached_raw_freq = 0; + + sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); + + for_each_cpu(cpu, policy->cpus) { + struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + + memset(sg_cpu, 0, sizeof(*sg_cpu)); + sg_cpu->cpu = cpu; + sg_cpu->sg_policy = sg_policy; + } + + if (policy_is_shared(policy)) + uu = sugov_update_shared; + else if (policy->fast_switch_enabled && cpufreq_driver_has_adjust_perf()) + uu = sugov_update_single_perf; + else + uu = sugov_update_single_freq; + + for_each_cpu(cpu, policy->cpus) { + struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu); + + cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, uu); + } + return 0; +} + +static void sugov_stop(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + unsigned int cpu; + + for_each_cpu(cpu, policy->cpus) + cpufreq_remove_update_util_hook(cpu); + + synchronize_rcu(); + + if (!policy->fast_switch_enabled) { + irq_work_sync(&sg_policy->irq_work); + kthread_cancel_work_sync(&sg_policy->work); + } +} + +static void sugov_limits(struct cpufreq_policy *policy) +{ + struct sugov_policy *sg_policy = policy->governor_data; + + if (!policy->fast_switch_enabled) { + mutex_lock(&sg_policy->work_lock); + cpufreq_policy_apply_limits(policy); + mutex_unlock(&sg_policy->work_lock); + } + + sg_policy->limits_changed = true; +} + +struct cpufreq_governor schedutil_gov = { + .name = "schedutil", + .owner = THIS_MODULE, + .flags = CPUFREQ_GOV_DYNAMIC_SWITCHING, + .init = sugov_init, + .exit = sugov_exit, + .start = sugov_start, + .stop = sugov_stop, + .limits = sugov_limits, +}; + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL +struct cpufreq_governor *cpufreq_default_governor(void) +{ + return &schedutil_gov; +} +#endif + +cpufreq_governor_init(schedutil_gov); + +#ifdef CONFIG_ENERGY_MODEL +static void rebuild_sd_workfn(struct work_struct *work) +{ + rebuild_sched_domains_energy(); +} +static DECLARE_WORK(rebuild_sd_work, rebuild_sd_workfn); + +/* + * EAS shouldn't be attempted without sugov, so rebuild the sched_domains + * on governor changes to make sure the scheduler knows about it. + */ +void sched_cpufreq_governor_change(struct cpufreq_policy *policy, + struct cpufreq_governor *old_gov) +{ + if (old_gov == &schedutil_gov || policy->governor == &schedutil_gov) { + /* + * When called from the cpufreq_register_driver() path, the + * cpu_hotplug_lock is already held, so use a work item to + * avoid nested locking in rebuild_sched_domains(). + */ + schedule_work(&rebuild_sd_work); + } + +} +#endif |