summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig48
-rw-r--r--drivers/cpuidle/Kconfig.arm77
-rw-r--r--drivers/cpuidle/Kconfig.mips17
-rw-r--r--drivers/cpuidle/Kconfig.powerpc20
-rw-r--r--drivers/cpuidle/Makefile31
-rw-r--r--drivers/cpuidle/coupled.c800
-rw-r--r--drivers/cpuidle/cpuidle-arm.c187
-rw-r--r--drivers/cpuidle/cpuidle-at91.c65
-rw-r--r--drivers/cpuidle/cpuidle-big_little.c233
-rw-r--r--drivers/cpuidle/cpuidle-calxeda.c83
-rw-r--r--drivers/cpuidle/cpuidle-clps711x.c59
-rw-r--r--drivers/cpuidle/cpuidle-cps.c182
-rw-r--r--drivers/cpuidle/cpuidle-exynos.c146
-rw-r--r--drivers/cpuidle/cpuidle-kirkwood.c87
-rw-r--r--drivers/cpuidle/cpuidle-mvebu-v7.c139
-rw-r--r--drivers/cpuidle/cpuidle-powernv.c420
-rw-r--r--drivers/cpuidle/cpuidle-pseries.c301
-rw-r--r--drivers/cpuidle/cpuidle-ux500.c127
-rw-r--r--drivers/cpuidle/cpuidle-zynq.c76
-rw-r--r--drivers/cpuidle/cpuidle.c706
-rw-r--r--drivers/cpuidle/cpuidle.h72
-rw-r--r--drivers/cpuidle/driver.c355
-rw-r--r--drivers/cpuidle/dt_idle_states.c229
-rw-r--r--drivers/cpuidle/dt_idle_states.h8
-rw-r--r--drivers/cpuidle/governor.c110
-rw-r--r--drivers/cpuidle/governors/Makefile6
-rw-r--r--drivers/cpuidle/governors/ladder.c199
-rw-r--r--drivers/cpuidle/governors/menu.c609
-rw-r--r--drivers/cpuidle/poll_state.c56
-rw-r--r--drivers/cpuidle/sysfs.c718
30 files changed, 6166 insertions, 0 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
new file mode 100644
index 000000000..7e48eb5bf
--- /dev/null
+++ b/drivers/cpuidle/Kconfig
@@ -0,0 +1,48 @@
+menu "CPU Idle"
+
+config CPU_IDLE
+ bool "CPU idle PM support"
+ default y if ACPI || PPC_PSERIES
+ select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
+ select CPU_IDLE_GOV_MENU if (NO_HZ || NO_HZ_IDLE)
+ help
+ CPU idle is a generic framework for supporting software-controlled
+ idle processor power management. It includes modular cross-platform
+ governors that can be swapped during runtime.
+
+ If you're using an ACPI-enabled platform, you should say Y here.
+
+if CPU_IDLE
+
+config CPU_IDLE_MULTIPLE_DRIVERS
+ bool
+
+config CPU_IDLE_GOV_LADDER
+ bool "Ladder governor (for periodic timer tick)"
+
+config CPU_IDLE_GOV_MENU
+ bool "Menu governor (for tickless system)"
+
+config DT_IDLE_STATES
+ bool
+
+menu "ARM CPU Idle Drivers"
+depends on ARM || ARM64
+source "drivers/cpuidle/Kconfig.arm"
+endmenu
+
+menu "MIPS CPU Idle Drivers"
+depends on MIPS
+source "drivers/cpuidle/Kconfig.mips"
+endmenu
+
+menu "POWERPC CPU Idle Drivers"
+depends on PPC
+source "drivers/cpuidle/Kconfig.powerpc"
+endmenu
+
+endif
+
+config ARCH_NEEDS_CPU_IDLE_COUPLED
+ def_bool n
+endmenu
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
new file mode 100644
index 000000000..f52144808
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.arm
@@ -0,0 +1,77 @@
+#
+# ARM CPU Idle drivers
+#
+config ARM_CPUIDLE
+ bool "Generic ARM/ARM64 CPU idle Driver"
+ select DT_IDLE_STATES
+ select CPU_IDLE_MULTIPLE_DRIVERS
+ help
+ Select this to enable generic cpuidle driver for ARM.
+ It provides a generic idle driver whose idle states are configured
+ at run-time through DT nodes. The CPUidle suspend backend is
+ initialized by calling the CPU operations init idle hook
+ provided by architecture code.
+
+config ARM_BIG_LITTLE_CPUIDLE
+ bool "Support for ARM big.LITTLE processors"
+ depends on ARCH_VEXPRESS_TC2_PM || ARCH_EXYNOS
+ depends on MCPM && !ARM64
+ select ARM_CPU_SUSPEND
+ select CPU_IDLE_MULTIPLE_DRIVERS
+ select DT_IDLE_STATES
+ help
+ Select this option to enable CPU idle driver for big.LITTLE based
+ ARM systems. Driver manages CPUs coordination through MCPM and
+ define different C-states for little and big cores through the
+ multiple CPU idle drivers infrastructure.
+
+config ARM_CLPS711X_CPUIDLE
+ bool "CPU Idle Driver for CLPS711X processors"
+ depends on ARCH_CLPS711X && !ARM64 || COMPILE_TEST
+ help
+ Select this to enable cpuidle on Cirrus Logic CLPS711X SOCs.
+
+config ARM_HIGHBANK_CPUIDLE
+ bool "CPU Idle Driver for Calxeda processors"
+ depends on ARM_PSCI && !ARM64
+ select ARM_CPU_SUSPEND
+ help
+ Select this to enable cpuidle on Calxeda processors.
+
+config ARM_KIRKWOOD_CPUIDLE
+ bool "CPU Idle Driver for Marvell Kirkwood SoCs"
+ depends on MACH_KIRKWOOD && !ARM64
+ help
+ This adds the CPU Idle driver for Marvell Kirkwood SoCs.
+
+config ARM_ZYNQ_CPUIDLE
+ bool "CPU Idle Driver for Xilinx Zynq processors"
+ depends on ARCH_ZYNQ && !ARM64
+ help
+ Select this to enable cpuidle on Xilinx Zynq processors.
+
+config ARM_U8500_CPUIDLE
+ bool "Cpu Idle Driver for the ST-E u8500 processors"
+ depends on ARCH_U8500 && !ARM64
+ help
+ Select this to enable cpuidle for ST-E u8500 processors
+
+config ARM_AT91_CPUIDLE
+ bool "Cpu Idle Driver for the AT91 processors"
+ default y
+ depends on ARCH_AT91 && !ARM64
+ help
+ Select this to enable cpuidle for AT91 processors
+
+config ARM_EXYNOS_CPUIDLE
+ bool "Cpu Idle Driver for the Exynos processors"
+ depends on ARCH_EXYNOS && !ARM64
+ select ARCH_NEEDS_CPU_IDLE_COUPLED if SMP
+ help
+ Select this to enable cpuidle for Exynos processors
+
+config ARM_MVEBU_V7_CPUIDLE
+ bool "CPU Idle Driver for mvebu v7 family processors"
+ depends on ARCH_MVEBU && !ARM64
+ help
+ Select this to enable cpuidle on Armada 370, 38x and XP processors.
diff --git a/drivers/cpuidle/Kconfig.mips b/drivers/cpuidle/Kconfig.mips
new file mode 100644
index 000000000..512ee37b3
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.mips
@@ -0,0 +1,17 @@
+#
+# MIPS CPU Idle Drivers
+#
+config MIPS_CPS_CPUIDLE
+ bool "CPU Idle driver for MIPS CPS platforms"
+ depends on CPU_IDLE && MIPS_CPS
+ depends on SYS_SUPPORTS_MIPS_CPS
+ select ARCH_NEEDS_CPU_IDLE_COUPLED if MIPS_MT || CPU_MIPSR6
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+ select MIPS_CPS_PM
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle for systems built around the MIPS Coherent
+ Processing System (CPS) architecture. In order to make use of
+ the deepest idle states you will need to ensure that you are
+ also using the CONFIG_MIPS_CPS SMP implementation.
diff --git a/drivers/cpuidle/Kconfig.powerpc b/drivers/cpuidle/Kconfig.powerpc
new file mode 100644
index 000000000..66c3a0957
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.powerpc
@@ -0,0 +1,20 @@
+#
+# POWERPC CPU Idle Drivers
+#
+config PSERIES_CPUIDLE
+ bool "Cpuidle driver for pSeries platforms"
+ depends on CPU_IDLE
+ depends on PPC_PSERIES
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
+
+config POWERNV_CPUIDLE
+ bool "Cpuidle driver for powernv platforms"
+ depends on CPU_IDLE
+ depends on PPC_POWERNV
+ default y
+ help
+ Select this option to enable processor idle state management
+ through cpuidle subsystem.
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
new file mode 100644
index 000000000..9d7176cee
--- /dev/null
+++ b/drivers/cpuidle/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for cpuidle.
+#
+
+obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
+obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
+obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o
+obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o
+
+##################################################################################
+# ARM SoC drivers
+obj-$(CONFIG_ARM_MVEBU_V7_CPUIDLE) += cpuidle-mvebu-v7.o
+obj-$(CONFIG_ARM_BIG_LITTLE_CPUIDLE) += cpuidle-big_little.o
+obj-$(CONFIG_ARM_CLPS711X_CPUIDLE) += cpuidle-clps711x.o
+obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE) += cpuidle-calxeda.o
+obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o
+obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o
+obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o
+obj-$(CONFIG_ARM_AT91_CPUIDLE) += cpuidle-at91.o
+obj-$(CONFIG_ARM_EXYNOS_CPUIDLE) += cpuidle-exynos.o
+obj-$(CONFIG_ARM_CPUIDLE) += cpuidle-arm.o
+
+###############################################################################
+# MIPS drivers
+obj-$(CONFIG_MIPS_CPS_CPUIDLE) += cpuidle-cps.o
+
+###############################################################################
+# POWERPC drivers
+obj-$(CONFIG_PSERIES_CPUIDLE) += cpuidle-pseries.o
+obj-$(CONFIG_POWERNV_CPUIDLE) += cpuidle-powernv.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
new file mode 100644
index 000000000..147f38ea0
--- /dev/null
+++ b/drivers/cpuidle/coupled.c
@@ -0,0 +1,800 @@
+/*
+ * coupled.c - helper functions to enter the same idle state on multiple cpus
+ *
+ * Copyright (c) 2011 Google, Inc.
+ *
+ * Author: Colin Cross <ccross@android.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "cpuidle.h"
+
+/**
+ * DOC: Coupled cpuidle states
+ *
+ * On some ARM SMP SoCs (OMAP4460, Tegra 2, and probably more), the
+ * cpus cannot be independently powered down, either due to
+ * sequencing restrictions (on Tegra 2, cpu 0 must be the last to
+ * power down), or due to HW bugs (on OMAP4460, a cpu powering up
+ * will corrupt the gic state unless the other cpu runs a work
+ * around). Each cpu has a power state that it can enter without
+ * coordinating with the other cpu (usually Wait For Interrupt, or
+ * WFI), and one or more "coupled" power states that affect blocks
+ * shared between the cpus (L2 cache, interrupt controller, and
+ * sometimes the whole SoC). Entering a coupled power state must
+ * be tightly controlled on both cpus.
+ *
+ * This file implements a solution, where each cpu will wait in the
+ * WFI state until all cpus are ready to enter a coupled state, at
+ * which point the coupled state function will be called on all
+ * cpus at approximately the same time.
+ *
+ * Once all cpus are ready to enter idle, they are woken by an smp
+ * cross call. At this point, there is a chance that one of the
+ * cpus will find work to do, and choose not to enter idle. A
+ * final pass is needed to guarantee that all cpus will call the
+ * power state enter function at the same time. During this pass,
+ * each cpu will increment the ready counter, and continue once the
+ * ready counter matches the number of online coupled cpus. If any
+ * cpu exits idle, the other cpus will decrement their counter and
+ * retry.
+ *
+ * requested_state stores the deepest coupled idle state each cpu
+ * is ready for. It is assumed that the states are indexed from
+ * shallowest (highest power, lowest exit latency) to deepest
+ * (lowest power, highest exit latency). The requested_state
+ * variable is not locked. It is only written from the cpu that
+ * it stores (or by the on/offlining cpu if that cpu is offline),
+ * and only read after all the cpus are ready for the coupled idle
+ * state are are no longer updating it.
+ *
+ * Three atomic counters are used. alive_count tracks the number
+ * of cpus in the coupled set that are currently or soon will be
+ * online. waiting_count tracks the number of cpus that are in
+ * the waiting loop, in the ready loop, or in the coupled idle state.
+ * ready_count tracks the number of cpus that are in the ready loop
+ * or in the coupled idle state.
+ *
+ * To use coupled cpuidle states, a cpuidle driver must:
+ *
+ * Set struct cpuidle_device.coupled_cpus to the mask of all
+ * coupled cpus, usually the same as cpu_possible_mask if all cpus
+ * are part of the same cluster. The coupled_cpus mask must be
+ * set in the struct cpuidle_device for each cpu.
+ *
+ * Set struct cpuidle_device.safe_state to a state that is not a
+ * coupled state. This is usually WFI.
+ *
+ * Set CPUIDLE_FLAG_COUPLED in struct cpuidle_state.flags for each
+ * state that affects multiple cpus.
+ *
+ * Provide a struct cpuidle_state.enter function for each state
+ * that affects multiple cpus. This function is guaranteed to be
+ * called on all cpus at approximately the same time. The driver
+ * should ensure that the cpus all abort together if any cpu tries
+ * to abort once the function is called. The function should return
+ * with interrupts still disabled.
+ */
+
+/**
+ * struct cpuidle_coupled - data for set of cpus that share a coupled idle state
+ * @coupled_cpus: mask of cpus that are part of the coupled set
+ * @requested_state: array of requested states for cpus in the coupled set
+ * @ready_waiting_counts: combined count of cpus in ready or waiting loops
+ * @online_count: count of cpus that are online
+ * @refcnt: reference count of cpuidle devices that are using this struct
+ * @prevent: flag to prevent coupled idle while a cpu is hotplugging
+ */
+struct cpuidle_coupled {
+ cpumask_t coupled_cpus;
+ int requested_state[NR_CPUS];
+ atomic_t ready_waiting_counts;
+ atomic_t abort_barrier;
+ int online_count;
+ int refcnt;
+ int prevent;
+};
+
+#define WAITING_BITS 16
+#define MAX_WAITING_CPUS (1 << WAITING_BITS)
+#define WAITING_MASK (MAX_WAITING_CPUS - 1)
+#define READY_MASK (~WAITING_MASK)
+
+#define CPUIDLE_COUPLED_NOT_IDLE (-1)
+
+static DEFINE_PER_CPU(call_single_data_t, cpuidle_coupled_poke_cb);
+
+/*
+ * The cpuidle_coupled_poke_pending mask is used to avoid calling
+ * __smp_call_function_single with the per cpu call_single_data_t struct already
+ * in use. This prevents a deadlock where two cpus are waiting for each others
+ * call_single_data_t struct to be available
+ */
+static cpumask_t cpuidle_coupled_poke_pending;
+
+/*
+ * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
+ * once to minimize entering the ready loop with a poke pending, which would
+ * require aborting and retrying.
+ */
+static cpumask_t cpuidle_coupled_poked;
+
+/**
+ * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
+ * @dev: cpuidle_device of the calling cpu
+ * @a: atomic variable to hold the barrier
+ *
+ * No caller to this function will return from this function until all online
+ * cpus in the same coupled group have called this function. Once any caller
+ * has returned from this function, the barrier is immediately available for
+ * reuse.
+ *
+ * The atomic variable must be initialized to 0 before any cpu calls
+ * this function, will be reset to 0 before any cpu returns from this function.
+ *
+ * Must only be called from within a coupled idle state handler
+ * (state.enter when state.flags has CPUIDLE_FLAG_COUPLED set).
+ *
+ * Provides full smp barrier semantics before and after calling.
+ */
+void cpuidle_coupled_parallel_barrier(struct cpuidle_device *dev, atomic_t *a)
+{
+ int n = dev->coupled->online_count;
+
+ smp_mb__before_atomic();
+ atomic_inc(a);
+
+ while (atomic_read(a) < n)
+ cpu_relax();
+
+ if (atomic_inc_return(a) == n * 2) {
+ atomic_set(a, 0);
+ return;
+ }
+
+ while (atomic_read(a) > n)
+ cpu_relax();
+}
+
+/**
+ * cpuidle_state_is_coupled - check if a state is part of a coupled set
+ * @drv: struct cpuidle_driver for the platform
+ * @state: index of the target state in drv->states
+ *
+ * Returns true if the target state is coupled with cpus besides this one
+ */
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state)
+{
+ return drv->states[state].flags & CPUIDLE_FLAG_COUPLED;
+}
+
+/**
+ * cpuidle_coupled_state_verify - check if the coupled states are correctly set.
+ * @drv: struct cpuidle_driver for the platform
+ *
+ * Returns 0 for valid state values, a negative error code otherwise:
+ * * -EINVAL if any coupled state(safe_state_index) is wrongly set.
+ */
+int cpuidle_coupled_state_verify(struct cpuidle_driver *drv)
+{
+ int i;
+
+ for (i = drv->state_count - 1; i >= 0; i--) {
+ if (cpuidle_state_is_coupled(drv, i) &&
+ (drv->safe_state_index == i ||
+ drv->safe_state_index < 0 ||
+ drv->safe_state_index >= drv->state_count))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/**
+ * cpuidle_coupled_set_ready - mark a cpu as ready
+ * @coupled: the struct coupled that contains the current cpu
+ */
+static inline void cpuidle_coupled_set_ready(struct cpuidle_coupled *coupled)
+{
+ atomic_add(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
+}
+
+/**
+ * cpuidle_coupled_set_not_ready - mark a cpu as not ready
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Decrements the ready counter, unless the ready (and thus the waiting) counter
+ * is equal to the number of online cpus. Prevents a race where one cpu
+ * decrements the waiting counter and then re-increments it just before another
+ * cpu has decremented its ready counter, leading to the ready counter going
+ * down from the number of online cpus without going through the coupled idle
+ * state.
+ *
+ * Returns 0 if the counter was decremented successfully, -EINVAL if the ready
+ * counter was equal to the number of online cpus.
+ */
+static
+inline int cpuidle_coupled_set_not_ready(struct cpuidle_coupled *coupled)
+{
+ int all;
+ int ret;
+
+ all = coupled->online_count | (coupled->online_count << WAITING_BITS);
+ ret = atomic_add_unless(&coupled->ready_waiting_counts,
+ -MAX_WAITING_CPUS, all);
+
+ return ret ? 0 : -EINVAL;
+}
+
+/**
+ * cpuidle_coupled_no_cpus_ready - check if no cpus in a coupled set are ready
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Returns true if all of the cpus in a coupled set are out of the ready loop.
+ */
+static inline int cpuidle_coupled_no_cpus_ready(struct cpuidle_coupled *coupled)
+{
+ int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
+ return r == 0;
+}
+
+/**
+ * cpuidle_coupled_cpus_ready - check if all cpus in a coupled set are ready
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Returns true if all cpus coupled to this target state are in the ready loop
+ */
+static inline bool cpuidle_coupled_cpus_ready(struct cpuidle_coupled *coupled)
+{
+ int r = atomic_read(&coupled->ready_waiting_counts) >> WAITING_BITS;
+ return r == coupled->online_count;
+}
+
+/**
+ * cpuidle_coupled_cpus_waiting - check if all cpus in a coupled set are waiting
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Returns true if all cpus coupled to this target state are in the wait loop
+ */
+static inline bool cpuidle_coupled_cpus_waiting(struct cpuidle_coupled *coupled)
+{
+ int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
+ return w == coupled->online_count;
+}
+
+/**
+ * cpuidle_coupled_no_cpus_waiting - check if no cpus in coupled set are waiting
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Returns true if all of the cpus in a coupled set are out of the waiting loop.
+ */
+static inline int cpuidle_coupled_no_cpus_waiting(struct cpuidle_coupled *coupled)
+{
+ int w = atomic_read(&coupled->ready_waiting_counts) & WAITING_MASK;
+ return w == 0;
+}
+
+/**
+ * cpuidle_coupled_get_state - determine the deepest idle state
+ * @dev: struct cpuidle_device for this cpu
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Returns the deepest idle state that all coupled cpus can enter
+ */
+static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
+ struct cpuidle_coupled *coupled)
+{
+ int i;
+ int state = INT_MAX;
+
+ /*
+ * Read barrier ensures that read of requested_state is ordered after
+ * reads of ready_count. Matches the write barriers
+ * cpuidle_set_state_waiting.
+ */
+ smp_rmb();
+
+ for_each_cpu(i, &coupled->coupled_cpus)
+ if (cpu_online(i) && coupled->requested_state[i] < state)
+ state = coupled->requested_state[i];
+
+ return state;
+}
+
+static void cpuidle_coupled_handle_poke(void *info)
+{
+ int cpu = (unsigned long)info;
+ cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
+ cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
+}
+
+/**
+ * cpuidle_coupled_poke - wake up a cpu that may be waiting
+ * @cpu: target cpu
+ *
+ * Ensures that the target cpu exits it's waiting idle state (if it is in it)
+ * and will see updates to waiting_count before it re-enters it's waiting idle
+ * state.
+ *
+ * If cpuidle_coupled_poked_mask is already set for the target cpu, that cpu
+ * either has or will soon have a pending IPI that will wake it out of idle,
+ * or it is currently processing the IPI and is not in idle.
+ */
+static void cpuidle_coupled_poke(int cpu)
+{
+ call_single_data_t *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
+
+ if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
+ smp_call_function_single_async(cpu, csd);
+}
+
+/**
+ * cpuidle_coupled_poke_others - wake up all other cpus that may be waiting
+ * @dev: struct cpuidle_device for this cpu
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Calls cpuidle_coupled_poke on all other online cpus.
+ */
+static void cpuidle_coupled_poke_others(int this_cpu,
+ struct cpuidle_coupled *coupled)
+{
+ int cpu;
+
+ for_each_cpu(cpu, &coupled->coupled_cpus)
+ if (cpu != this_cpu && cpu_online(cpu))
+ cpuidle_coupled_poke(cpu);
+}
+
+/**
+ * cpuidle_coupled_set_waiting - mark this cpu as in the wait loop
+ * @dev: struct cpuidle_device for this cpu
+ * @coupled: the struct coupled that contains the current cpu
+ * @next_state: the index in drv->states of the requested state for this cpu
+ *
+ * Updates the requested idle state for the specified cpuidle device.
+ * Returns the number of waiting cpus.
+ */
+static int cpuidle_coupled_set_waiting(int cpu,
+ struct cpuidle_coupled *coupled, int next_state)
+{
+ coupled->requested_state[cpu] = next_state;
+
+ /*
+ * The atomic_inc_return provides a write barrier to order the write
+ * to requested_state with the later write that increments ready_count.
+ */
+ return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
+}
+
+/**
+ * cpuidle_coupled_set_not_waiting - mark this cpu as leaving the wait loop
+ * @dev: struct cpuidle_device for this cpu
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Removes the requested idle state for the specified cpuidle device.
+ */
+static void cpuidle_coupled_set_not_waiting(int cpu,
+ struct cpuidle_coupled *coupled)
+{
+ /*
+ * Decrementing waiting count can race with incrementing it in
+ * cpuidle_coupled_set_waiting, but that's OK. Worst case, some
+ * cpus will increment ready_count and then spin until they
+ * notice that this cpu has cleared it's requested_state.
+ */
+ atomic_dec(&coupled->ready_waiting_counts);
+
+ coupled->requested_state[cpu] = CPUIDLE_COUPLED_NOT_IDLE;
+}
+
+/**
+ * cpuidle_coupled_set_done - mark this cpu as leaving the ready loop
+ * @cpu: the current cpu
+ * @coupled: the struct coupled that contains the current cpu
+ *
+ * Marks this cpu as no longer in the ready and waiting loops. Decrements
+ * the waiting count first to prevent another cpu looping back in and seeing
+ * this cpu as waiting just before it exits idle.
+ */
+static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
+{
+ cpuidle_coupled_set_not_waiting(cpu, coupled);
+ atomic_sub(MAX_WAITING_CPUS, &coupled->ready_waiting_counts);
+}
+
+/**
+ * cpuidle_coupled_clear_pokes - spin until the poke interrupt is processed
+ * @cpu - this cpu
+ *
+ * Turns on interrupts and spins until any outstanding poke interrupts have
+ * been processed and the poke bit has been cleared.
+ *
+ * Other interrupts may also be processed while interrupts are enabled, so
+ * need_resched() must be tested after this function returns to make sure
+ * the interrupt didn't schedule work that should take the cpu out of idle.
+ *
+ * Returns 0 if no poke was pending, 1 if a poke was cleared.
+ */
+static int cpuidle_coupled_clear_pokes(int cpu)
+{
+ if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
+ return 0;
+
+ local_irq_enable();
+ while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
+ cpu_relax();
+ local_irq_disable();
+
+ return 1;
+}
+
+static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
+{
+ cpumask_t cpus;
+ int ret;
+
+ cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
+ ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
+
+ return ret;
+}
+
+/**
+ * cpuidle_enter_state_coupled - attempt to enter a state with coupled cpus
+ * @dev: struct cpuidle_device for the current cpu
+ * @drv: struct cpuidle_driver for the platform
+ * @next_state: index of the requested state in drv->states
+ *
+ * Coordinate with coupled cpus to enter the target state. This is a two
+ * stage process. In the first stage, the cpus are operating independently,
+ * and may call into cpuidle_enter_state_coupled at completely different times.
+ * To save as much power as possible, the first cpus to call this function will
+ * go to an intermediate state (the cpuidle_device's safe state), and wait for
+ * all the other cpus to call this function. Once all coupled cpus are idle,
+ * the second stage will start. Each coupled cpu will spin until all cpus have
+ * guaranteed that they will call the target_state.
+ *
+ * This function must be called with interrupts disabled. It may enable
+ * interrupts while preparing for idle, and it will always return with
+ * interrupts enabled.
+ */
+int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int next_state)
+{
+ int entered_state = -1;
+ struct cpuidle_coupled *coupled = dev->coupled;
+ int w;
+
+ if (!coupled)
+ return -EINVAL;
+
+ while (coupled->prevent) {
+ cpuidle_coupled_clear_pokes(dev->cpu);
+ if (need_resched()) {
+ local_irq_enable();
+ return entered_state;
+ }
+ entered_state = cpuidle_enter_state(dev, drv,
+ drv->safe_state_index);
+ local_irq_disable();
+ }
+
+ /* Read barrier ensures online_count is read after prevent is cleared */
+ smp_rmb();
+
+reset:
+ cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
+
+ w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+ /*
+ * If this is the last cpu to enter the waiting state, poke
+ * all the other cpus out of their waiting state so they can
+ * enter a deeper state. This can race with one of the cpus
+ * exiting the waiting state due to an interrupt and
+ * decrementing waiting_count, see comment below.
+ */
+ if (w == coupled->online_count) {
+ cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
+ cpuidle_coupled_poke_others(dev->cpu, coupled);
+ }
+
+retry:
+ /*
+ * Wait for all coupled cpus to be idle, using the deepest state
+ * allowed for a single cpu. If this was not the poking cpu, wait
+ * for at least one poke before leaving to avoid a race where
+ * two cpus could arrive at the waiting loop at the same time,
+ * but the first of the two to arrive could skip the loop without
+ * processing the pokes from the last to arrive.
+ */
+ while (!cpuidle_coupled_cpus_waiting(coupled) ||
+ !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
+ if (cpuidle_coupled_clear_pokes(dev->cpu))
+ continue;
+
+ if (need_resched()) {
+ cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
+ goto out;
+ }
+
+ if (coupled->prevent) {
+ cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
+ goto out;
+ }
+
+ entered_state = cpuidle_enter_state(dev, drv,
+ drv->safe_state_index);
+ local_irq_disable();
+ }
+
+ cpuidle_coupled_clear_pokes(dev->cpu);
+ if (need_resched()) {
+ cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
+ goto out;
+ }
+
+ /*
+ * Make sure final poke status for this cpu is visible before setting
+ * cpu as ready.
+ */
+ smp_wmb();
+
+ /*
+ * All coupled cpus are probably idle. There is a small chance that
+ * one of the other cpus just became active. Increment the ready count,
+ * and spin until all coupled cpus have incremented the counter. Once a
+ * cpu has incremented the ready counter, it cannot abort idle and must
+ * spin until either all cpus have incremented the ready counter, or
+ * another cpu leaves idle and decrements the waiting counter.
+ */
+
+ cpuidle_coupled_set_ready(coupled);
+ while (!cpuidle_coupled_cpus_ready(coupled)) {
+ /* Check if any other cpus bailed out of idle. */
+ if (!cpuidle_coupled_cpus_waiting(coupled))
+ if (!cpuidle_coupled_set_not_ready(coupled))
+ goto retry;
+
+ cpu_relax();
+ }
+
+ /*
+ * Make sure read of all cpus ready is done before reading pending pokes
+ */
+ smp_rmb();
+
+ /*
+ * There is a small chance that a cpu left and reentered idle after this
+ * cpu saw that all cpus were waiting. The cpu that reentered idle will
+ * have sent this cpu a poke, which will still be pending after the
+ * ready loop. The pending interrupt may be lost by the interrupt
+ * controller when entering the deep idle state. It's not possible to
+ * clear a pending interrupt without turning interrupts on and handling
+ * it, and it's too late to turn on interrupts here, so reset the
+ * coupled idle state of all cpus and retry.
+ */
+ if (cpuidle_coupled_any_pokes_pending(coupled)) {
+ cpuidle_coupled_set_done(dev->cpu, coupled);
+ /* Wait for all cpus to see the pending pokes */
+ cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
+ goto reset;
+ }
+
+ /* all cpus have acked the coupled state */
+ next_state = cpuidle_coupled_get_state(dev, coupled);
+
+ entered_state = cpuidle_enter_state(dev, drv, next_state);
+
+ cpuidle_coupled_set_done(dev->cpu, coupled);
+
+out:
+ /*
+ * Normal cpuidle states are expected to return with irqs enabled.
+ * That leads to an inefficiency where a cpu receiving an interrupt
+ * that brings it out of idle will process that interrupt before
+ * exiting the idle enter function and decrementing ready_count. All
+ * other cpus will need to spin waiting for the cpu that is processing
+ * the interrupt. If the driver returns with interrupts disabled,
+ * all other cpus will loop back into the safe idle state instead of
+ * spinning, saving power.
+ *
+ * Calling local_irq_enable here allows coupled states to return with
+ * interrupts disabled, but won't cause problems for drivers that
+ * exit with interrupts enabled.
+ */
+ local_irq_enable();
+
+ /*
+ * Wait until all coupled cpus have exited idle. There is no risk that
+ * a cpu exits and re-enters the ready state because this cpu has
+ * already decremented its waiting_count.
+ */
+ while (!cpuidle_coupled_no_cpus_ready(coupled))
+ cpu_relax();
+
+ return entered_state;
+}
+
+static void cpuidle_coupled_update_online_cpus(struct cpuidle_coupled *coupled)
+{
+ cpumask_t cpus;
+ cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
+ coupled->online_count = cpumask_weight(&cpus);
+}
+
+/**
+ * cpuidle_coupled_register_device - register a coupled cpuidle device
+ * @dev: struct cpuidle_device for the current cpu
+ *
+ * Called from cpuidle_register_device to handle coupled idle init. Finds the
+ * cpuidle_coupled struct for this set of coupled cpus, or creates one if none
+ * exists yet.
+ */
+int cpuidle_coupled_register_device(struct cpuidle_device *dev)
+{
+ int cpu;
+ struct cpuidle_device *other_dev;
+ call_single_data_t *csd;
+ struct cpuidle_coupled *coupled;
+
+ if (cpumask_empty(&dev->coupled_cpus))
+ return 0;
+
+ for_each_cpu(cpu, &dev->coupled_cpus) {
+ other_dev = per_cpu(cpuidle_devices, cpu);
+ if (other_dev && other_dev->coupled) {
+ coupled = other_dev->coupled;
+ goto have_coupled;
+ }
+ }
+
+ /* No existing coupled info found, create a new one */
+ coupled = kzalloc(sizeof(struct cpuidle_coupled), GFP_KERNEL);
+ if (!coupled)
+ return -ENOMEM;
+
+ coupled->coupled_cpus = dev->coupled_cpus;
+
+have_coupled:
+ dev->coupled = coupled;
+ if (WARN_ON(!cpumask_equal(&dev->coupled_cpus, &coupled->coupled_cpus)))
+ coupled->prevent++;
+
+ cpuidle_coupled_update_online_cpus(coupled);
+
+ coupled->refcnt++;
+
+ csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
+ csd->func = cpuidle_coupled_handle_poke;
+ csd->info = (void *)(unsigned long)dev->cpu;
+
+ return 0;
+}
+
+/**
+ * cpuidle_coupled_unregister_device - unregister a coupled cpuidle device
+ * @dev: struct cpuidle_device for the current cpu
+ *
+ * Called from cpuidle_unregister_device to tear down coupled idle. Removes the
+ * cpu from the coupled idle set, and frees the cpuidle_coupled_info struct if
+ * this was the last cpu in the set.
+ */
+void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
+{
+ struct cpuidle_coupled *coupled = dev->coupled;
+
+ if (cpumask_empty(&dev->coupled_cpus))
+ return;
+
+ if (--coupled->refcnt)
+ kfree(coupled);
+ dev->coupled = NULL;
+}
+
+/**
+ * cpuidle_coupled_prevent_idle - prevent cpus from entering a coupled state
+ * @coupled: the struct coupled that contains the cpu that is changing state
+ *
+ * Disables coupled cpuidle on a coupled set of cpus. Used to ensure that
+ * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
+ */
+static void cpuidle_coupled_prevent_idle(struct cpuidle_coupled *coupled)
+{
+ int cpu = get_cpu();
+
+ /* Force all cpus out of the waiting loop. */
+ coupled->prevent++;
+ cpuidle_coupled_poke_others(cpu, coupled);
+ put_cpu();
+ while (!cpuidle_coupled_no_cpus_waiting(coupled))
+ cpu_relax();
+}
+
+/**
+ * cpuidle_coupled_allow_idle - allows cpus to enter a coupled state
+ * @coupled: the struct coupled that contains the cpu that is changing state
+ *
+ * Enables coupled cpuidle on a coupled set of cpus. Used to ensure that
+ * cpu_online_mask doesn't change while cpus are coordinating coupled idle.
+ */
+static void cpuidle_coupled_allow_idle(struct cpuidle_coupled *coupled)
+{
+ int cpu = get_cpu();
+
+ /*
+ * Write barrier ensures readers see the new online_count when they
+ * see prevent == 0.
+ */
+ smp_wmb();
+ coupled->prevent--;
+ /* Force cpus out of the prevent loop. */
+ cpuidle_coupled_poke_others(cpu, coupled);
+ put_cpu();
+}
+
+static int coupled_cpu_online(unsigned int cpu)
+{
+ struct cpuidle_device *dev;
+
+ mutex_lock(&cpuidle_lock);
+
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (dev && dev->coupled) {
+ cpuidle_coupled_update_online_cpus(dev->coupled);
+ cpuidle_coupled_allow_idle(dev->coupled);
+ }
+
+ mutex_unlock(&cpuidle_lock);
+ return 0;
+}
+
+static int coupled_cpu_up_prepare(unsigned int cpu)
+{
+ struct cpuidle_device *dev;
+
+ mutex_lock(&cpuidle_lock);
+
+ dev = per_cpu(cpuidle_devices, cpu);
+ if (dev && dev->coupled)
+ cpuidle_coupled_prevent_idle(dev->coupled);
+
+ mutex_unlock(&cpuidle_lock);
+ return 0;
+}
+
+static int __init cpuidle_coupled_init(void)
+{
+ int ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE,
+ "cpuidle/coupled:prepare",
+ coupled_cpu_up_prepare,
+ coupled_cpu_online);
+ if (ret)
+ return ret;
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/coupled:online",
+ coupled_cpu_online,
+ coupled_cpu_up_prepare);
+ if (ret < 0)
+ cpuhp_remove_state_nocalls(CPUHP_CPUIDLE_COUPLED_PREPARE);
+ return ret;
+}
+core_initcall(cpuidle_coupled_init);
diff --git a/drivers/cpuidle/cpuidle-arm.c b/drivers/cpuidle/cpuidle-arm.c
new file mode 100644
index 000000000..df564d783
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-arm.c
@@ -0,0 +1,187 @@
+/*
+ * ARM/ARM64 generic CPU idle driver.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "CPUidle arm: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/cpu_pm.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/topology.h>
+
+#include <asm/cpuidle.h>
+
+#include "dt_idle_states.h"
+
+/*
+ * arm_enter_idle_state - Programs CPU to enter the specified state
+ *
+ * dev: cpuidle device
+ * drv: cpuidle driver
+ * idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int arm_enter_idle_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ /*
+ * Pass idle state index to arm_cpuidle_suspend which in turn
+ * will call the CPU ops suspend protocol with idle index as a
+ * parameter.
+ */
+ return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
+}
+
+static struct cpuidle_driver arm_idle_driver __initdata = {
+ .name = "arm_idle",
+ .owner = THIS_MODULE,
+ /*
+ * State at index 0 is standby wfi and considered standard
+ * on all ARM platforms. If in some platforms simple wfi
+ * can't be used as "state 0", DT bindings must be implemented
+ * to work around this issue and allow installing a special
+ * handler for idle state index 0.
+ */
+ .states[0] = {
+ .enter = arm_enter_idle_state,
+ .exit_latency = 1,
+ .target_residency = 1,
+ .power_usage = UINT_MAX,
+ .name = "WFI",
+ .desc = "ARM WFI",
+ }
+};
+
+static const struct of_device_id arm_idle_state_match[] __initconst = {
+ { .compatible = "arm,idle-state",
+ .data = arm_enter_idle_state },
+ { },
+};
+
+/*
+ * arm_idle_init_cpu
+ *
+ * Registers the arm specific cpuidle driver with the cpuidle
+ * framework. It relies on core code to parse the idle states
+ * and initialize them using driver data structures accordingly.
+ */
+static int __init arm_idle_init_cpu(int cpu)
+{
+ int ret;
+ struct cpuidle_driver *drv;
+ struct cpuidle_device *dev;
+
+ drv = kmemdup(&arm_idle_driver, sizeof(*drv), GFP_KERNEL);
+ if (!drv)
+ return -ENOMEM;
+
+ drv->cpumask = (struct cpumask *)cpumask_of(cpu);
+
+ /*
+ * Initialize idle states data, starting at index 1. This
+ * driver is DT only, if no DT idle states are detected (ret
+ * == 0) let the driver initialization fail accordingly since
+ * there is no reason to initialize the idle driver if only
+ * wfi is supported.
+ */
+ ret = dt_init_idle_driver(drv, arm_idle_state_match, 1);
+ if (ret <= 0) {
+ ret = ret ? : -ENODEV;
+ goto out_kfree_drv;
+ }
+
+ /*
+ * Call arch CPU operations in order to initialize
+ * idle states suspend back-end specific data
+ */
+ ret = arm_cpuidle_init(cpu);
+
+ /*
+ * Allow the initialization to continue for other CPUs, if the reported
+ * failure is a HW misconfiguration/breakage (-ENXIO).
+ */
+ if (ret) {
+ pr_err("CPU %d failed to init idle CPU ops\n", cpu);
+ ret = ret == -ENXIO ? 0 : ret;
+ goto out_kfree_drv;
+ }
+
+ ret = cpuidle_register_driver(drv);
+ if (ret) {
+ if (ret != -EBUSY)
+ pr_err("Failed to register cpuidle driver\n");
+ goto out_kfree_drv;
+ }
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev) {
+ ret = -ENOMEM;
+ goto out_unregister_drv;
+ }
+ dev->cpu = cpu;
+
+ ret = cpuidle_register_device(dev);
+ if (ret) {
+ pr_err("Failed to register cpuidle device for CPU %d\n",
+ cpu);
+ goto out_kfree_dev;
+ }
+
+ return 0;
+
+out_kfree_dev:
+ kfree(dev);
+out_unregister_drv:
+ cpuidle_unregister_driver(drv);
+out_kfree_drv:
+ kfree(drv);
+ return ret;
+}
+
+/*
+ * arm_idle_init - Initializes arm cpuidle driver
+ *
+ * Initializes arm cpuidle driver for all CPUs, if any CPU fails
+ * to register cpuidle driver then rollback to cancel all CPUs
+ * registeration.
+ */
+static int __init arm_idle_init(void)
+{
+ int cpu, ret;
+ struct cpuidle_driver *drv;
+ struct cpuidle_device *dev;
+
+ for_each_possible_cpu(cpu) {
+ ret = arm_idle_init_cpu(cpu);
+ if (ret)
+ goto out_fail;
+ }
+
+ return 0;
+
+out_fail:
+ while (--cpu >= 0) {
+ dev = per_cpu(cpuidle_devices, cpu);
+ drv = cpuidle_get_cpu_driver(dev);
+ cpuidle_unregister_device(dev);
+ cpuidle_unregister_driver(drv);
+ kfree(dev);
+ kfree(drv);
+ }
+
+ return ret;
+}
+device_initcall(arm_idle_init);
diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c
new file mode 100644
index 000000000..9c5853b6c
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-at91.c
@@ -0,0 +1,65 @@
+/*
+ * based on arch/arm/mach-kirkwood/cpuidle.c
+ *
+ * CPU idle support for AT91 SoC
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * The cpu idle uses wait-for-interrupt and RAM self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and RAM self refresh
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <asm/cpuidle.h>
+
+#define AT91_MAX_STATES 2
+
+static void (*at91_standby)(void);
+
+/* Actual code that puts the SoC in different idle states */
+static int at91_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ at91_standby();
+ return index;
+}
+
+static struct cpuidle_driver at91_idle_driver = {
+ .name = "at91_idle",
+ .owner = THIS_MODULE,
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = at91_enter_idle,
+ .exit_latency = 10,
+ .target_residency = 10000,
+ .name = "RAM_SR",
+ .desc = "WFI and DDR Self Refresh",
+ },
+ .state_count = AT91_MAX_STATES,
+};
+
+/* Initialize CPU idle by registering the idle states */
+static int at91_cpuidle_probe(struct platform_device *dev)
+{
+ at91_standby = (void *)(dev->dev.platform_data);
+
+ return cpuidle_register(&at91_idle_driver, NULL);
+}
+
+static struct platform_driver at91_cpuidle_driver = {
+ .driver = {
+ .name = "cpuidle-at91",
+ },
+ .probe = at91_cpuidle_probe,
+};
+builtin_platform_driver(at91_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
new file mode 100644
index 000000000..b44476a1b
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -0,0 +1,233 @@
+/*
+ * Copyright (c) 2013 ARM/Linaro
+ *
+ * Authors: Daniel Lezcano <daniel.lezcano@linaro.org>
+ * Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Nicolas Pitre <nicolas.pitre@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Maintainer: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ * Maintainer: Daniel Lezcano <daniel.lezcano@linaro.org>
+ */
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#include <asm/cpu.h>
+#include <asm/cputype.h>
+#include <asm/cpuidle.h>
+#include <asm/mcpm.h>
+#include <asm/smp_plat.h>
+#include <asm/suspend.h>
+
+#include "dt_idle_states.h"
+
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx);
+
+/*
+ * NB: Owing to current menu governor behaviour big and LITTLE
+ * index 1 states have to define exit_latency and target_residency for
+ * cluster state since, when all CPUs in a cluster hit it, the cluster
+ * can be shutdown. This means that when a single CPU enters this state
+ * the exit_latency and target_residency values are somewhat overkill.
+ * There is no notion of cluster states in the menu governor, so CPUs
+ * have to define CPU states where possibly the cluster will be shutdown
+ * depending on the state of other CPUs. idle states entry and exit happen
+ * at random times; however the cluster state provides target_residency
+ * values as if all CPUs in a cluster enter the state at once; this is
+ * somewhat optimistic and behaviour should be fixed either in the governor
+ * or in the MCPM back-ends.
+ * To make this driver 100% generic the number of states and the exit_latency
+ * target_residency values must be obtained from device tree bindings.
+ *
+ * exit_latency: refers to the TC2 vexpress test chip and depends on the
+ * current cluster operating point. It is the time it takes to get the CPU
+ * up and running when the CPU is powered up on cluster wake-up from shutdown.
+ * Current values for big and LITTLE clusters are provided for clusters
+ * running at default operating points.
+ *
+ * target_residency: it is the minimum amount of time the cluster has
+ * to be down to break even in terms of power consumption. cluster
+ * shutdown has inherent dynamic power costs (L2 writebacks to DRAM
+ * being the main factor) that depend on the current operating points.
+ * The current values for both clusters are provided for a CPU whose half
+ * of L2 lines are dirty and require cleaning to DRAM, and takes into
+ * account leakage static power values related to the vexpress TC2 testchip.
+ */
+static struct cpuidle_driver bl_idle_little_driver = {
+ .name = "little_idle",
+ .owner = THIS_MODULE,
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = bl_enter_powerdown,
+ .exit_latency = 700,
+ .target_residency = 2500,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "C1",
+ .desc = "ARM little-cluster power down",
+ },
+ .state_count = 2,
+};
+
+static const struct of_device_id bl_idle_state_match[] __initconst = {
+ { .compatible = "arm,idle-state",
+ .data = bl_enter_powerdown },
+ { },
+};
+
+static struct cpuidle_driver bl_idle_big_driver = {
+ .name = "big_idle",
+ .owner = THIS_MODULE,
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = bl_enter_powerdown,
+ .exit_latency = 500,
+ .target_residency = 2000,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "C1",
+ .desc = "ARM big-cluster power down",
+ },
+ .state_count = 2,
+};
+
+/*
+ * notrace prevents trace shims from getting inserted where they
+ * should not. Global jumps and ldrex/strex must not be inserted
+ * in power down sequences where caches and MMU may be turned off.
+ */
+static int notrace bl_powerdown_finisher(unsigned long arg)
+{
+ /* MCPM works with HW CPU identifiers */
+ unsigned int mpidr = read_cpuid_mpidr();
+ unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+ unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+
+ mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+ mcpm_cpu_suspend();
+
+ /* return value != 0 means failure */
+ return 1;
+}
+
+/**
+ * bl_enter_powerdown - Programs CPU to enter the specified state
+ * @dev: cpuidle device
+ * @drv: The target state to be programmed
+ * @idx: state index
+ *
+ * Called from the CPUidle framework to program the device to the
+ * specified target state selected by the governor.
+ */
+static int bl_enter_powerdown(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int idx)
+{
+ cpu_pm_enter();
+
+ cpu_suspend(0, bl_powerdown_finisher);
+
+ /* signals the MCPM core that CPU is out of low power state */
+ mcpm_cpu_powered_up();
+
+ cpu_pm_exit();
+
+ return idx;
+}
+
+static int __init bl_idle_driver_init(struct cpuidle_driver *drv, int part_id)
+{
+ struct cpumask *cpumask;
+ int cpu;
+
+ cpumask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!cpumask)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ if (smp_cpuid_part(cpu) == part_id)
+ cpumask_set_cpu(cpu, cpumask);
+
+ drv->cpumask = cpumask;
+
+ return 0;
+}
+
+static const struct of_device_id compatible_machine_match[] = {
+ { .compatible = "arm,vexpress,v2p-ca15_a7" },
+ { .compatible = "samsung,exynos5420" },
+ { .compatible = "samsung,exynos5800" },
+ {},
+};
+
+static int __init bl_idle_init(void)
+{
+ int ret;
+ struct device_node *root = of_find_node_by_path("/");
+ const struct of_device_id *match_id;
+
+ if (!root)
+ return -ENODEV;
+
+ /*
+ * Initialize the driver just for a compliant set of machines
+ */
+ match_id = of_match_node(compatible_machine_match, root);
+
+ of_node_put(root);
+
+ if (!match_id)
+ return -ENODEV;
+
+ if (!mcpm_is_available())
+ return -EUNATCH;
+
+ /*
+ * For now the differentiation between little and big cores
+ * is based on the part number. A7 cores are considered little
+ * cores, A15 are considered big cores. This distinction may
+ * evolve in the future with a more generic matching approach.
+ */
+ ret = bl_idle_driver_init(&bl_idle_little_driver,
+ ARM_CPU_PART_CORTEX_A7);
+ if (ret)
+ return ret;
+
+ ret = bl_idle_driver_init(&bl_idle_big_driver, ARM_CPU_PART_CORTEX_A15);
+ if (ret)
+ goto out_uninit_little;
+
+ /* Start at index 1, index 0 standard WFI */
+ ret = dt_init_idle_driver(&bl_idle_big_driver, bl_idle_state_match, 1);
+ if (ret < 0)
+ goto out_uninit_big;
+
+ /* Start at index 1, index 0 standard WFI */
+ ret = dt_init_idle_driver(&bl_idle_little_driver,
+ bl_idle_state_match, 1);
+ if (ret < 0)
+ goto out_uninit_big;
+
+ ret = cpuidle_register(&bl_idle_little_driver, NULL);
+ if (ret)
+ goto out_uninit_big;
+
+ ret = cpuidle_register(&bl_idle_big_driver, NULL);
+ if (ret)
+ goto out_unregister_little;
+
+ return 0;
+
+out_unregister_little:
+ cpuidle_unregister(&bl_idle_little_driver);
+out_uninit_big:
+ kfree(bl_idle_big_driver.cpumask);
+out_uninit_little:
+ kfree(bl_idle_little_driver.cpumask);
+
+ return ret;
+}
+device_initcall(bl_idle_init);
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
new file mode 100644
index 000000000..ea9728fde
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2012 Calxeda, Inc.
+ *
+ * Based on arch/arm/plat-mxc/cpuidle.c: #v3.7
+ * Copyright 2012 Freescale Semiconductor, Inc.
+ * Copyright 2012 Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Maintainer: Rob Herring <rob.herring@calxeda.com>
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/platform_device.h>
+#include <linux/psci.h>
+
+#include <asm/cpuidle.h>
+#include <asm/suspend.h>
+
+#include <uapi/linux/psci.h>
+
+#define CALXEDA_IDLE_PARAM \
+ ((0 << PSCI_0_2_POWER_STATE_ID_SHIFT) | \
+ (0 << PSCI_0_2_POWER_STATE_AFFL_SHIFT) | \
+ (PSCI_POWER_STATE_TYPE_POWER_DOWN << PSCI_0_2_POWER_STATE_TYPE_SHIFT))
+
+static int calxeda_idle_finish(unsigned long val)
+{
+ return psci_ops.cpu_suspend(CALXEDA_IDLE_PARAM, __pa(cpu_resume));
+}
+
+static int calxeda_pwrdown_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ cpu_pm_enter();
+ cpu_suspend(0, calxeda_idle_finish);
+ cpu_pm_exit();
+
+ return index;
+}
+
+static struct cpuidle_driver calxeda_idle_driver = {
+ .name = "calxeda_idle",
+ .states = {
+ ARM_CPUIDLE_WFI_STATE,
+ {
+ .name = "PG",
+ .desc = "Power Gate",
+ .exit_latency = 30,
+ .power_usage = 50,
+ .target_residency = 200,
+ .enter = calxeda_pwrdown_idle,
+ },
+ },
+ .state_count = 2,
+};
+
+static int calxeda_cpuidle_probe(struct platform_device *pdev)
+{
+ return cpuidle_register(&calxeda_idle_driver, NULL);
+}
+
+static struct platform_driver calxeda_cpuidle_plat_driver = {
+ .driver = {
+ .name = "cpuidle-calxeda",
+ },
+ .probe = calxeda_cpuidle_probe,
+};
+builtin_platform_driver(calxeda_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle-clps711x.c b/drivers/cpuidle/cpuidle-clps711x.c
new file mode 100644
index 000000000..66a9f231e
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-clps711x.c
@@ -0,0 +1,59 @@
+/*
+ * CLPS711X CPU idle driver
+ *
+ * Copyright (C) 2014 Alexander Shiyan <shc_work@mail.ru>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+
+#define CLPS711X_CPUIDLE_NAME "clps711x-cpuidle"
+
+static void __iomem *clps711x_halt;
+
+static int clps711x_cpuidle_halt(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ writel(0xaa, clps711x_halt);
+
+ return index;
+}
+
+static struct cpuidle_driver clps711x_idle_driver = {
+ .name = CLPS711X_CPUIDLE_NAME,
+ .owner = THIS_MODULE,
+ .states[0] = {
+ .name = "HALT",
+ .desc = "CLPS711X HALT",
+ .enter = clps711x_cpuidle_halt,
+ .exit_latency = 1,
+ },
+ .state_count = 1,
+};
+
+static int __init clps711x_cpuidle_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ clps711x_halt = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(clps711x_halt))
+ return PTR_ERR(clps711x_halt);
+
+ return cpuidle_register(&clps711x_idle_driver, NULL);
+}
+
+static struct platform_driver clps711x_cpuidle_driver = {
+ .driver = {
+ .name = CLPS711X_CPUIDLE_NAME,
+ },
+};
+builtin_platform_driver_probe(clps711x_cpuidle_driver, clps711x_cpuidle_probe);
diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
new file mode 100644
index 000000000..dac8ff639
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Imagination Technologies
+ * Author: Paul Burton <paul.burton@mips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/init.h>
+
+#include <asm/idle.h>
+#include <asm/pm-cps.h>
+
+/* Enumeration of the various idle states this driver may enter */
+enum cps_idle_state {
+ STATE_WAIT = 0, /* MIPS wait instruction, coherent */
+ STATE_NC_WAIT, /* MIPS wait instruction, non-coherent */
+ STATE_CLOCK_GATED, /* Core clock gated */
+ STATE_POWER_GATED, /* Core power gated */
+ STATE_COUNT
+};
+
+static int cps_nc_enter(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ enum cps_pm_state pm_state;
+ int err;
+
+ /*
+ * At least one core must remain powered up & clocked in order for the
+ * system to have any hope of functioning.
+ *
+ * TODO: don't treat core 0 specially, just prevent the final core
+ * TODO: remap interrupt affinity temporarily
+ */
+ if (cpus_are_siblings(0, dev->cpu) && (index > STATE_NC_WAIT))
+ index = STATE_NC_WAIT;
+
+ /* Select the appropriate cps_pm_state */
+ switch (index) {
+ case STATE_NC_WAIT:
+ pm_state = CPS_PM_NC_WAIT;
+ break;
+ case STATE_CLOCK_GATED:
+ pm_state = CPS_PM_CLOCK_GATED;
+ break;
+ case STATE_POWER_GATED:
+ pm_state = CPS_PM_POWER_GATED;
+ break;
+ default:
+ BUG();
+ return -EINVAL;
+ }
+
+ /* Notify listeners the CPU is about to power down */
+ if ((pm_state == CPS_PM_POWER_GATED) && cpu_pm_enter())
+ return -EINTR;
+
+ /* Enter that state */
+ err = cps_pm_enter_state(pm_state);
+
+ /* Notify listeners the CPU is back up */
+ if (pm_state == CPS_PM_POWER_GATED)
+ cpu_pm_exit();
+
+ return err ?: index;
+}
+
+static struct cpuidle_driver cps_driver = {
+ .name = "cpc_cpuidle",
+ .owner = THIS_MODULE,
+ .states = {
+ [STATE_WAIT] = MIPS_CPUIDLE_WAIT_STATE,
+ [STATE_NC_WAIT] = {
+ .enter = cps_nc_enter,
+ .exit_latency = 200,
+ .target_residency = 450,
+ .name = "nc-wait",
+ .desc = "non-coherent MIPS wait",
+ },
+ [STATE_CLOCK_GATED] = {
+ .enter = cps_nc_enter,
+ .exit_latency = 300,
+ .target_residency = 700,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "clock-gated",
+ .desc = "core clock gated",
+ },
+ [STATE_POWER_GATED] = {
+ .enter = cps_nc_enter,
+ .exit_latency = 600,
+ .target_residency = 1000,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "power-gated",
+ .desc = "core power gated",
+ },
+ },
+ .state_count = STATE_COUNT,
+ .safe_state_index = 0,
+};
+
+static void __init cps_cpuidle_unregister(void)
+{
+ int cpu;
+ struct cpuidle_device *device;
+
+ for_each_possible_cpu(cpu) {
+ device = &per_cpu(cpuidle_dev, cpu);
+ cpuidle_unregister_device(device);
+ }
+
+ cpuidle_unregister_driver(&cps_driver);
+}
+
+static int __init cps_cpuidle_init(void)
+{
+ int err, cpu, i;
+ struct cpuidle_device *device;
+
+ /* Detect supported states */
+ if (!cps_pm_support_state(CPS_PM_POWER_GATED))
+ cps_driver.state_count = STATE_CLOCK_GATED + 1;
+ if (!cps_pm_support_state(CPS_PM_CLOCK_GATED))
+ cps_driver.state_count = STATE_NC_WAIT + 1;
+ if (!cps_pm_support_state(CPS_PM_NC_WAIT))
+ cps_driver.state_count = STATE_WAIT + 1;
+
+ /* Inform the user if some states are unavailable */
+ if (cps_driver.state_count < STATE_COUNT) {
+ pr_info("cpuidle-cps: limited to ");
+ switch (cps_driver.state_count - 1) {
+ case STATE_WAIT:
+ pr_cont("coherent wait\n");
+ break;
+ case STATE_NC_WAIT:
+ pr_cont("non-coherent wait\n");
+ break;
+ case STATE_CLOCK_GATED:
+ pr_cont("clock gating\n");
+ break;
+ }
+ }
+
+ /*
+ * Set the coupled flag on the appropriate states if this system
+ * requires it.
+ */
+ if (coupled_coherence)
+ for (i = STATE_NC_WAIT; i < cps_driver.state_count; i++)
+ cps_driver.states[i].flags |= CPUIDLE_FLAG_COUPLED;
+
+ err = cpuidle_register_driver(&cps_driver);
+ if (err) {
+ pr_err("Failed to register CPS cpuidle driver\n");
+ return err;
+ }
+
+ for_each_possible_cpu(cpu) {
+ device = &per_cpu(cpuidle_dev, cpu);
+ device->cpu = cpu;
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
+ cpumask_copy(&device->coupled_cpus, &cpu_sibling_map[cpu]);
+#endif
+
+ err = cpuidle_register_device(device);
+ if (err) {
+ pr_err("Failed to register CPU%d cpuidle device\n",
+ cpu);
+ goto err_out;
+ }
+ }
+
+ return 0;
+err_out:
+ cps_cpuidle_unregister();
+ return err;
+}
+device_initcall(cps_cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
new file mode 100644
index 000000000..0171a6e19
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2011-2014 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * Coupled cpuidle support based on the work of:
+ * Colin Cross <ccross@android.com>
+ * Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/platform_data/cpuidle-exynos.h>
+
+#include <asm/suspend.h>
+#include <asm/cpuidle.h>
+
+static atomic_t exynos_idle_barrier;
+
+static struct cpuidle_exynos_data *exynos_cpuidle_pdata;
+static void (*exynos_enter_aftr)(void);
+
+static int exynos_enter_coupled_lowpower(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ int ret;
+
+ exynos_cpuidle_pdata->pre_enter_aftr();
+
+ /*
+ * Waiting all cpus to reach this point at the same moment
+ */
+ cpuidle_coupled_parallel_barrier(dev, &exynos_idle_barrier);
+
+ /*
+ * Both cpus will reach this point at the same time
+ */
+ ret = dev->cpu ? exynos_cpuidle_pdata->cpu1_powerdown()
+ : exynos_cpuidle_pdata->cpu0_enter_aftr();
+ if (ret)
+ index = ret;
+
+ /*
+ * Waiting all cpus to finish the power sequence before going further
+ */
+ cpuidle_coupled_parallel_barrier(dev, &exynos_idle_barrier);
+
+ exynos_cpuidle_pdata->post_enter_aftr();
+
+ return index;
+}
+
+static int exynos_enter_lowpower(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ int new_index = index;
+
+ /* AFTR can only be entered when cores other than CPU0 are offline */
+ if (num_online_cpus() > 1 || dev->cpu != 0)
+ new_index = drv->safe_state_index;
+
+ if (new_index == 0)
+ return arm_cpuidle_simple_enter(dev, drv, new_index);
+
+ exynos_enter_aftr();
+
+ return new_index;
+}
+
+static struct cpuidle_driver exynos_idle_driver = {
+ .name = "exynos_idle",
+ .owner = THIS_MODULE,
+ .states = {
+ [0] = ARM_CPUIDLE_WFI_STATE,
+ [1] = {
+ .enter = exynos_enter_lowpower,
+ .exit_latency = 300,
+ .target_residency = 100000,
+ .name = "C1",
+ .desc = "ARM power down",
+ },
+ },
+ .state_count = 2,
+ .safe_state_index = 0,
+};
+
+static struct cpuidle_driver exynos_coupled_idle_driver = {
+ .name = "exynos_coupled_idle",
+ .owner = THIS_MODULE,
+ .states = {
+ [0] = ARM_CPUIDLE_WFI_STATE,
+ [1] = {
+ .enter = exynos_enter_coupled_lowpower,
+ .exit_latency = 5000,
+ .target_residency = 10000,
+ .flags = CPUIDLE_FLAG_COUPLED |
+ CPUIDLE_FLAG_TIMER_STOP,
+ .name = "C1",
+ .desc = "ARM power down",
+ },
+ },
+ .state_count = 2,
+ .safe_state_index = 0,
+};
+
+static int exynos_cpuidle_probe(struct platform_device *pdev)
+{
+ int ret;
+
+ if (IS_ENABLED(CONFIG_SMP) &&
+ (of_machine_is_compatible("samsung,exynos4210") ||
+ of_machine_is_compatible("samsung,exynos3250"))) {
+ exynos_cpuidle_pdata = pdev->dev.platform_data;
+
+ ret = cpuidle_register(&exynos_coupled_idle_driver,
+ cpu_possible_mask);
+ } else {
+ exynos_enter_aftr = (void *)(pdev->dev.platform_data);
+
+ ret = cpuidle_register(&exynos_idle_driver, NULL);
+ }
+
+ if (ret) {
+ dev_err(&pdev->dev, "failed to register cpuidle driver\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static struct platform_driver exynos_cpuidle_driver = {
+ .probe = exynos_cpuidle_probe,
+ .driver = {
+ .name = "exynos_cpuidle",
+ },
+};
+builtin_platform_driver(exynos_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
new file mode 100644
index 000000000..d23d8f468
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -0,0 +1,87 @@
+/*
+ * CPU idle Marvell Kirkwood SoCs
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * The cpu idle uses wait-for-interrupt and DDR self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and DDR self refresh
+ *
+ * Maintainer: Jason Cooper <jason@lakedaemon.net>
+ * Maintainer: Andrew Lunn <andrew@lunn.ch>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/cpuidle.h>
+#include <linux/io.h>
+#include <linux/export.h>
+#include <asm/cpuidle.h>
+
+#define KIRKWOOD_MAX_STATES 2
+
+static void __iomem *ddr_operation_base;
+
+/* Actual code that puts the SoC in different idle states */
+static int kirkwood_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ writel(0x7, ddr_operation_base);
+ cpu_do_idle();
+
+ return index;
+}
+
+static struct cpuidle_driver kirkwood_idle_driver = {
+ .name = "kirkwood_idle",
+ .owner = THIS_MODULE,
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = kirkwood_enter_idle,
+ .exit_latency = 10,
+ .target_residency = 100000,
+ .name = "DDR SR",
+ .desc = "WFI and DDR Self Refresh",
+ },
+ .state_count = KIRKWOOD_MAX_STATES,
+};
+
+/* Initialize CPU idle by registering the idle states */
+static int kirkwood_cpuidle_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(ddr_operation_base))
+ return PTR_ERR(ddr_operation_base);
+
+ return cpuidle_register(&kirkwood_idle_driver, NULL);
+}
+
+static int kirkwood_cpuidle_remove(struct platform_device *pdev)
+{
+ cpuidle_unregister(&kirkwood_idle_driver);
+ return 0;
+}
+
+static struct platform_driver kirkwood_cpuidle_driver = {
+ .probe = kirkwood_cpuidle_probe,
+ .remove = kirkwood_cpuidle_remove,
+ .driver = {
+ .name = "kirkwood_cpuidle",
+ },
+};
+
+module_platform_driver(kirkwood_cpuidle_driver);
+
+MODULE_AUTHOR("Andrew Lunn <andrew@lunn.ch>");
+MODULE_DESCRIPTION("Kirkwood cpu idle driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:kirkwood-cpuidle");
diff --git a/drivers/cpuidle/cpuidle-mvebu-v7.c b/drivers/cpuidle/cpuidle-mvebu-v7.c
new file mode 100644
index 000000000..01a856971
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -0,0 +1,139 @@
+/*
+ * Marvell Armada 370, 38x and XP SoC cpuidle driver
+ *
+ * Copyright (C) 2014 Marvell
+ *
+ * Nadav Haklai <nadavh@marvell.com>
+ * Gregory CLEMENT <gregory.clement@free-electrons.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ *
+ * Maintainer: Gregory CLEMENT <gregory.clement@free-electrons.com>
+ */
+
+#include <linux/cpu_pm.h>
+#include <linux/cpuidle.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/suspend.h>
+#include <linux/platform_device.h>
+#include <asm/cpuidle.h>
+
+#define MVEBU_V7_FLAG_DEEP_IDLE 0x10000
+
+static int (*mvebu_v7_cpu_suspend)(int);
+
+static int mvebu_v7_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ int ret;
+ bool deepidle = false;
+ cpu_pm_enter();
+
+ if (drv->states[index].flags & MVEBU_V7_FLAG_DEEP_IDLE)
+ deepidle = true;
+
+ ret = mvebu_v7_cpu_suspend(deepidle);
+ cpu_pm_exit();
+
+ if (ret)
+ return ret;
+
+ return index;
+}
+
+static struct cpuidle_driver armadaxp_idle_driver = {
+ .name = "armada_xp_idle",
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = mvebu_v7_enter_idle,
+ .exit_latency = 100,
+ .power_usage = 50,
+ .target_residency = 1000,
+ .name = "MV CPU IDLE",
+ .desc = "CPU power down",
+ },
+ .states[2] = {
+ .enter = mvebu_v7_enter_idle,
+ .exit_latency = 1000,
+ .power_usage = 5,
+ .target_residency = 10000,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .name = "MV CPU DEEP IDLE",
+ .desc = "CPU and L2 Fabric power down",
+ },
+ .state_count = 3,
+};
+
+static struct cpuidle_driver armada370_idle_driver = {
+ .name = "armada_370_idle",
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = mvebu_v7_enter_idle,
+ .exit_latency = 100,
+ .power_usage = 5,
+ .target_residency = 1000,
+ .flags = MVEBU_V7_FLAG_DEEP_IDLE,
+ .name = "Deep Idle",
+ .desc = "CPU and L2 Fabric power down",
+ },
+ .state_count = 2,
+};
+
+static struct cpuidle_driver armada38x_idle_driver = {
+ .name = "armada_38x_idle",
+ .states[0] = ARM_CPUIDLE_WFI_STATE,
+ .states[1] = {
+ .enter = mvebu_v7_enter_idle,
+ .exit_latency = 10,
+ .power_usage = 5,
+ .target_residency = 100,
+ .name = "Idle",
+ .desc = "CPU and SCU power down",
+ },
+ .state_count = 2,
+};
+
+static int mvebu_v7_cpuidle_probe(struct platform_device *pdev)
+{
+ const struct platform_device_id *id = pdev->id_entry;
+
+ if (!id)
+ return -EINVAL;
+
+ mvebu_v7_cpu_suspend = pdev->dev.platform_data;
+
+ return cpuidle_register((struct cpuidle_driver *)id->driver_data, NULL);
+}
+
+static const struct platform_device_id mvebu_cpuidle_ids[] = {
+ {
+ .name = "cpuidle-armada-xp",
+ .driver_data = (unsigned long)&armadaxp_idle_driver,
+ }, {
+ .name = "cpuidle-armada-370",
+ .driver_data = (unsigned long)&armada370_idle_driver,
+ }, {
+ .name = "cpuidle-armada-38x",
+ .driver_data = (unsigned long)&armada38x_idle_driver,
+ },
+ {}
+};
+
+static struct platform_driver mvebu_cpuidle_driver = {
+ .probe = mvebu_v7_cpuidle_probe,
+ .driver = {
+ .name = "cpuidle-mbevu",
+ .suppress_bind_attrs = true,
+ },
+ .id_table = mvebu_cpuidle_ids,
+};
+
+builtin_platform_driver(mvebu_cpuidle_driver);
+
+MODULE_AUTHOR("Gregory CLEMENT <gregory.clement@free-electrons.com>");
+MODULE_DESCRIPTION("Marvell EBU v7 cpuidle driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
new file mode 100644
index 000000000..84b1ebe21
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cpuidle-powernv - idle state cpuidle driver.
+ * Adapted from drivers/cpuidle/cpuidle-pseries
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+#include <linux/clockchips.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/opal.h>
+#include <asm/runlatch.h>
+#include <asm/cpuidle.h>
+
+/*
+ * Expose only those Hardware idle states via the cpuidle framework
+ * that have latency value below POWERNV_THRESHOLD_LATENCY_NS.
+ */
+#define POWERNV_THRESHOLD_LATENCY_NS 200000
+
+static struct cpuidle_driver powernv_idle_driver = {
+ .name = "powernv_idle",
+ .owner = THIS_MODULE,
+};
+
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
+
+struct stop_psscr_table {
+ u64 val;
+ u64 mask;
+};
+
+static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly;
+
+static u64 default_snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
+
+static u64 get_snooze_timeout(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ int i;
+
+ if (unlikely(!snooze_timeout_en))
+ return default_snooze_timeout;
+
+ for (i = index + 1; i < drv->state_count; i++) {
+ struct cpuidle_state *s = &drv->states[i];
+ struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+ if (s->disabled || su->disable)
+ continue;
+
+ return s->target_residency * tb_ticks_per_usec;
+ }
+
+ return default_snooze_timeout;
+}
+
+static int snooze_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ u64 snooze_exit_time;
+
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ local_irq_enable();
+
+ snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
+ ppc64_runlatch_off();
+ HMT_very_low();
+ while (!need_resched()) {
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ break;
+ }
+ }
+
+ HMT_medium();
+ ppc64_runlatch_on();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+
+ local_irq_disable();
+
+ return index;
+}
+
+static int nap_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ power7_idle_type(PNV_THREAD_NAP);
+
+ return index;
+}
+
+/* Register for fastsleep only in oneshot mode of broadcast */
+#ifdef CONFIG_TICK_ONESHOT
+static int fastsleep_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long old_lpcr = mfspr(SPRN_LPCR);
+ unsigned long new_lpcr;
+
+ if (unlikely(system_state < SYSTEM_RUNNING))
+ return index;
+
+ new_lpcr = old_lpcr;
+ /* Do not exit powersave upon decrementer as we've setup the timer
+ * offload.
+ */
+ new_lpcr &= ~LPCR_PECE1;
+
+ mtspr(SPRN_LPCR, new_lpcr);
+
+ power7_idle_type(PNV_THREAD_SLEEP);
+
+ mtspr(SPRN_LPCR, old_lpcr);
+
+ return index;
+}
+#endif
+
+static int stop_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ power9_idle_type(stop_psscr_table[index].val,
+ stop_psscr_table[index].mask);
+ return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = snooze_loop },
+};
+
+static int powernv_cpuidle_cpu_online(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_enable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
+
+static int powernv_cpuidle_cpu_dead(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_disable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
+
+/*
+ * powernv_cpuidle_driver_init()
+ */
+static int powernv_cpuidle_driver_init(void)
+{
+ int idle_state;
+ struct cpuidle_driver *drv = &powernv_idle_driver;
+
+ drv->state_count = 0;
+
+ for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+ /* Is the state not enabled? */
+ if (cpuidle_state_table[idle_state].enter == NULL)
+ continue;
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[idle_state];
+
+ drv->state_count += 1;
+ }
+
+ /*
+ * On the PowerNV platform cpu_present may be less than cpu_possible in
+ * cases when firmware detects the CPU, but it is not available to the
+ * OS. If CONFIG_HOTPLUG_CPU=n, then such CPUs are not hotplugable at
+ * run time and hence cpu_devices are not created for those CPUs by the
+ * generic topology_init().
+ *
+ * drv->cpumask defaults to cpu_possible_mask in
+ * __cpuidle_driver_init(). This breaks cpuidle on PowerNV where
+ * cpu_devices are not created for CPUs in cpu_possible_mask that
+ * cannot be hot-added later at run time.
+ *
+ * Trying cpuidle_register_device() on a CPU without a cpu_device is
+ * incorrect, so pass a correct CPU mask to the generic cpuidle driver.
+ */
+
+ drv->cpumask = (struct cpumask *)cpu_present_mask;
+
+ return 0;
+}
+
+static inline void add_powernv_state(int index, const char *name,
+ unsigned int flags,
+ int (*idle_fn)(struct cpuidle_device *,
+ struct cpuidle_driver *,
+ int),
+ unsigned int target_residency,
+ unsigned int exit_latency,
+ u64 psscr_val, u64 psscr_mask)
+{
+ strlcpy(powernv_states[index].name, name, CPUIDLE_NAME_LEN);
+ strlcpy(powernv_states[index].desc, name, CPUIDLE_NAME_LEN);
+ powernv_states[index].flags = flags;
+ powernv_states[index].target_residency = target_residency;
+ powernv_states[index].exit_latency = exit_latency;
+ powernv_states[index].enter = idle_fn;
+ /* For power8 and below psscr_* will be 0 */
+ stop_psscr_table[index].val = psscr_val;
+ stop_psscr_table[index].mask = psscr_mask;
+}
+
+/*
+ * Returns 0 if prop1_len == prop2_len. Else returns -1
+ */
+static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
+ const char *prop2, int prop2_len)
+{
+ if (prop1_len == prop2_len)
+ return 0;
+
+ pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
+ prop1, prop2);
+ return -1;
+}
+
+extern u32 pnv_get_supported_cpuidle_states(void);
+static int powernv_add_idle_states(void)
+{
+ int nr_idle_states = 1; /* Snooze */
+ int dt_idle_states;
+ u32 has_stop_states = 0;
+ int i;
+ u32 supported_flags = pnv_get_supported_cpuidle_states();
+
+
+ /* Currently we have snooze statically defined */
+ if (nr_pnv_idle_states <= 0) {
+ pr_warn("cpuidle-powernv : Only Snooze is available\n");
+ goto out;
+ }
+
+ /* TODO: Count only states which are eligible for cpuidle */
+ dt_idle_states = nr_pnv_idle_states;
+
+ /*
+ * Since snooze is used as first idle state, max idle states allowed is
+ * CPUIDLE_STATE_MAX -1
+ */
+ if (nr_pnv_idle_states > CPUIDLE_STATE_MAX - 1) {
+ pr_warn("cpuidle-powernv: discovered idle states more than allowed");
+ dt_idle_states = CPUIDLE_STATE_MAX - 1;
+ }
+
+ /*
+ * If the idle states use stop instruction, probe for psscr values
+ * and psscr mask which are necessary to specify required stop level.
+ */
+ has_stop_states = (pnv_idle_states[0].flags &
+ (OPAL_PM_STOP_INST_FAST | OPAL_PM_STOP_INST_DEEP));
+
+ for (i = 0; i < dt_idle_states; i++) {
+ unsigned int exit_latency, target_residency;
+ bool stops_timebase = false;
+ struct pnv_idle_states_t *state = &pnv_idle_states[i];
+
+ /*
+ * Skip the platform idle state whose flag isn't in
+ * the supported_cpuidle_states flag mask.
+ */
+ if ((state->flags & supported_flags) != state->flags)
+ continue;
+ /*
+ * If an idle state has exit latency beyond
+ * POWERNV_THRESHOLD_LATENCY_NS then don't use it
+ * in cpu-idle.
+ */
+ if (state->latency_ns > POWERNV_THRESHOLD_LATENCY_NS)
+ continue;
+ /*
+ * Firmware passes residency and latency values in ns.
+ * cpuidle expects it in us.
+ */
+ exit_latency = DIV_ROUND_UP(state->latency_ns, 1000);
+ target_residency = DIV_ROUND_UP(state->residency_ns, 1000);
+
+ if (has_stop_states && !(state->valid))
+ continue;
+
+ if (state->flags & OPAL_PM_TIMEBASE_STOP)
+ stops_timebase = true;
+
+ if (state->flags & OPAL_PM_NAP_ENABLED) {
+ /* Add NAP state */
+ add_powernv_state(nr_idle_states, "Nap",
+ CPUIDLE_FLAG_NONE, nap_loop,
+ target_residency, exit_latency, 0, 0);
+ } else if (has_stop_states && !stops_timebase) {
+ add_powernv_state(nr_idle_states, state->name,
+ CPUIDLE_FLAG_NONE, stop_loop,
+ target_residency, exit_latency,
+ state->psscr_val,
+ state->psscr_mask);
+ }
+
+ /*
+ * All cpuidle states with CPUIDLE_FLAG_TIMER_STOP set must come
+ * within this config dependency check.
+ */
+#ifdef CONFIG_TICK_ONESHOT
+ else if (state->flags & OPAL_PM_SLEEP_ENABLED ||
+ state->flags & OPAL_PM_SLEEP_ENABLED_ER1) {
+ /* Add FASTSLEEP state */
+ add_powernv_state(nr_idle_states, "FastSleep",
+ CPUIDLE_FLAG_TIMER_STOP,
+ fastsleep_loop,
+ target_residency, exit_latency, 0, 0);
+ } else if (has_stop_states && stops_timebase) {
+ add_powernv_state(nr_idle_states, state->name,
+ CPUIDLE_FLAG_TIMER_STOP, stop_loop,
+ target_residency, exit_latency,
+ state->psscr_val,
+ state->psscr_mask);
+ }
+#endif
+ else
+ continue;
+ nr_idle_states++;
+ }
+out:
+ return nr_idle_states;
+}
+
+/*
+ * powernv_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int powernv_idle_probe(void)
+{
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return -ENODEV;
+
+ if (firmware_has_feature(FW_FEATURE_OPAL)) {
+ cpuidle_state_table = powernv_states;
+ /* Device tree can indicate more idle states */
+ max_idle_state = powernv_add_idle_states();
+ default_snooze_timeout = TICK_USEC * tb_ticks_per_usec;
+ if (max_idle_state > 1)
+ snooze_timeout_en = true;
+ } else
+ return -ENODEV;
+
+ return 0;
+}
+
+static int __init powernv_processor_idle_init(void)
+{
+ int retval;
+
+ retval = powernv_idle_probe();
+ if (retval)
+ return retval;
+
+ powernv_cpuidle_driver_init();
+ retval = cpuidle_register(&powernv_idle_driver, NULL);
+ if (retval) {
+ printk(KERN_DEBUG "Registration of powernv driver failed.\n");
+ return retval;
+ }
+
+ retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/powernv:online",
+ powernv_cpuidle_cpu_online, NULL);
+ WARN_ON(retval < 0);
+ retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+ "cpuidle/powernv:dead", NULL,
+ powernv_cpuidle_cpu_dead);
+ WARN_ON(retval < 0);
+ printk(KERN_DEBUG "powernv_idle_driver registered\n");
+ return 0;
+}
+
+device_initcall(powernv_processor_idle_init);
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
new file mode 100644
index 000000000..74c247972
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cpuidle-pseries - idle state cpuidle driver.
+ * Adapted from drivers/idle/intel_idle.c and
+ * drivers/acpi/processor_idle.c
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/moduleparam.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu.h>
+#include <linux/notifier.h>
+
+#include <asm/paca.h>
+#include <asm/reg.h>
+#include <asm/machdep.h>
+#include <asm/firmware.h>
+#include <asm/runlatch.h>
+#include <asm/plpar_wrappers.h>
+
+struct cpuidle_driver pseries_idle_driver = {
+ .name = "pseries_idle",
+ .owner = THIS_MODULE,
+};
+
+static int max_idle_state __read_mostly;
+static struct cpuidle_state *cpuidle_state_table __read_mostly;
+static u64 snooze_timeout __read_mostly;
+static bool snooze_timeout_en __read_mostly;
+
+static inline void idle_loop_prolog(unsigned long *in_purr)
+{
+ ppc64_runlatch_off();
+ *in_purr = mfspr(SPRN_PURR);
+ /*
+ * Indicate to the HV that we are idle. Now would be
+ * a good time to find other work to dispatch.
+ */
+ get_lppaca()->idle = 1;
+}
+
+static inline void idle_loop_epilog(unsigned long in_purr)
+{
+ u64 wait_cycles;
+
+ wait_cycles = be64_to_cpu(get_lppaca()->wait_state_cycles);
+ wait_cycles += mfspr(SPRN_PURR) - in_purr;
+ get_lppaca()->wait_state_cycles = cpu_to_be64(wait_cycles);
+ get_lppaca()->idle = 0;
+
+ ppc64_runlatch_on();
+}
+
+static int snooze_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+ u64 snooze_exit_time;
+
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
+ idle_loop_prolog(&in_purr);
+ local_irq_enable();
+ snooze_exit_time = get_tb() + snooze_timeout;
+
+ while (!need_resched()) {
+ HMT_low();
+ HMT_very_low();
+ if (likely(snooze_timeout_en) && get_tb() > snooze_exit_time) {
+ /*
+ * Task has not woken up but we are exiting the polling
+ * loop anyway. Require a barrier after polling is
+ * cleared to order subsequent test of need_resched().
+ */
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb();
+ break;
+ }
+ }
+
+ HMT_medium();
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+
+ local_irq_disable();
+
+ idle_loop_epilog(in_purr);
+
+ return index;
+}
+
+static void check_and_cede_processor(void)
+{
+ /*
+ * Ensure our interrupt state is properly tracked,
+ * also checks if no interrupt has occurred while we
+ * were soft-disabled
+ */
+ if (prep_irq_for_idle()) {
+ cede_processor();
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /* Ensure that H_CEDE returns with IRQs on */
+ if (WARN_ON(!(mfmsr() & MSR_EE)))
+ __hard_irq_enable();
+#endif
+ }
+}
+
+static int dedicated_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+
+ idle_loop_prolog(&in_purr);
+ get_lppaca()->donate_dedicated_cpu = 1;
+
+ HMT_medium();
+ check_and_cede_processor();
+
+ local_irq_disable();
+ get_lppaca()->donate_dedicated_cpu = 0;
+
+ idle_loop_epilog(in_purr);
+
+ return index;
+}
+
+static int shared_cede_loop(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv,
+ int index)
+{
+ unsigned long in_purr;
+
+ idle_loop_prolog(&in_purr);
+
+ /*
+ * Yield the processor to the hypervisor. We return if
+ * an external interrupt occurs (which are driven prior
+ * to returning here) or if a prod occurs from another
+ * processor. When returning here, external interrupts
+ * are enabled.
+ */
+ check_and_cede_processor();
+
+ local_irq_disable();
+ idle_loop_epilog(in_purr);
+
+ return index;
+}
+
+/*
+ * States for dedicated partition case.
+ */
+static struct cpuidle_state dedicated_states[] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
+ { /* CEDE */
+ .name = "CEDE",
+ .desc = "CEDE",
+ .exit_latency = 10,
+ .target_residency = 100,
+ .enter = &dedicated_cede_loop },
+};
+
+/*
+ * States for shared partition case.
+ */
+static struct cpuidle_state shared_states[] = {
+ { /* Snooze */
+ .name = "snooze",
+ .desc = "snooze",
+ .exit_latency = 0,
+ .target_residency = 0,
+ .enter = &snooze_loop },
+ { /* Shared Cede */
+ .name = "Shared Cede",
+ .desc = "Shared Cede",
+ .exit_latency = 10,
+ .target_residency = 100,
+ .enter = &shared_cede_loop },
+};
+
+static int pseries_cpuidle_cpu_online(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_enable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
+
+static int pseries_cpuidle_cpu_dead(unsigned int cpu)
+{
+ struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
+
+ if (dev && cpuidle_get_driver()) {
+ cpuidle_pause_and_lock();
+ cpuidle_disable_device(dev);
+ cpuidle_resume_and_unlock();
+ }
+ return 0;
+}
+
+/*
+ * pseries_cpuidle_driver_init()
+ */
+static int pseries_cpuidle_driver_init(void)
+{
+ int idle_state;
+ struct cpuidle_driver *drv = &pseries_idle_driver;
+
+ drv->state_count = 0;
+
+ for (idle_state = 0; idle_state < max_idle_state; ++idle_state) {
+ /* Is the state not enabled? */
+ if (cpuidle_state_table[idle_state].enter == NULL)
+ continue;
+
+ drv->states[drv->state_count] = /* structure copy */
+ cpuidle_state_table[idle_state];
+
+ drv->state_count += 1;
+ }
+
+ return 0;
+}
+
+/*
+ * pseries_idle_probe()
+ * Choose state table for shared versus dedicated partition
+ */
+static int pseries_idle_probe(void)
+{
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return -ENODEV;
+
+ if (firmware_has_feature(FW_FEATURE_SPLPAR)) {
+ /*
+ * Use local_paca instead of get_lppaca() since
+ * preemption is not disabled, and it is not required in
+ * fact, since lppaca_ptr does not need to be the value
+ * associated to the current CPU, it can be from any CPU.
+ */
+ if (lppaca_shared_proc(local_paca->lppaca_ptr)) {
+ cpuidle_state_table = shared_states;
+ max_idle_state = ARRAY_SIZE(shared_states);
+ } else {
+ cpuidle_state_table = dedicated_states;
+ max_idle_state = ARRAY_SIZE(dedicated_states);
+ }
+ } else
+ return -ENODEV;
+
+ if (max_idle_state > 1) {
+ snooze_timeout_en = true;
+ snooze_timeout = cpuidle_state_table[1].target_residency *
+ tb_ticks_per_usec;
+ }
+ return 0;
+}
+
+static int __init pseries_processor_idle_init(void)
+{
+ int retval;
+
+ retval = pseries_idle_probe();
+ if (retval)
+ return retval;
+
+ pseries_cpuidle_driver_init();
+ retval = cpuidle_register(&pseries_idle_driver, NULL);
+ if (retval) {
+ printk(KERN_DEBUG "Registration of pseries driver failed.\n");
+ return retval;
+ }
+
+ retval = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "cpuidle/pseries:online",
+ pseries_cpuidle_cpu_online, NULL);
+ WARN_ON(retval < 0);
+ retval = cpuhp_setup_state_nocalls(CPUHP_CPUIDLE_DEAD,
+ "cpuidle/pseries:DEAD", NULL,
+ pseries_cpuidle_cpu_dead);
+ WARN_ON(retval < 0);
+ printk(KERN_DEBUG "pseries_idle_driver registered\n");
+ return 0;
+}
+
+device_initcall(pseries_processor_idle_init);
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
new file mode 100644
index 000000000..7941a090b
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM)
+ *
+ * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com>
+ * and Jonas Aaberg <jonas.aberg@stericsson.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/init.h>
+#include <linux/cpuidle.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/smp.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/platform_data/arm-ux500-pm.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpuidle.h>
+
+static atomic_t master = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(master_lock);
+
+static inline int ux500_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int this_cpu = smp_processor_id();
+ bool recouple = false;
+
+ if (atomic_inc_return(&master) == num_online_cpus()) {
+
+ /* With this lock, we prevent the other cpu to exit and enter
+ * this function again and become the master */
+ if (!spin_trylock(&master_lock))
+ goto wfi;
+
+ /* decouple the gic from the A9 cores */
+ if (prcmu_gic_decouple()) {
+ spin_unlock(&master_lock);
+ goto out;
+ }
+
+ /* If an error occur, we will have to recouple the gic
+ * manually */
+ recouple = true;
+
+ /* At this state, as the gic is decoupled, if the other
+ * cpu is in WFI, we have the guarantee it won't be wake
+ * up, so we can safely go to retention */
+ if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
+ goto out;
+
+ /* The prcmu will be in charge of watching the interrupts
+ * and wake up the cpus */
+ if (prcmu_copy_gic_settings())
+ goto out;
+
+ /* Check in the meantime an interrupt did
+ * not occur on the gic ... */
+ if (prcmu_gic_pending_irq())
+ goto out;
+
+ /* ... and the prcmu */
+ if (prcmu_pending_irq())
+ goto out;
+
+ /* Go to the retention state, the prcmu will wait for the
+ * cpu to go WFI and this is what happens after exiting this
+ * 'master' critical section */
+ if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
+ goto out;
+
+ /* When we switch to retention, the prcmu is in charge
+ * of recoupling the gic automatically */
+ recouple = false;
+
+ spin_unlock(&master_lock);
+ }
+wfi:
+ cpu_do_idle();
+out:
+ atomic_dec(&master);
+
+ if (recouple) {
+ prcmu_gic_recouple();
+ spin_unlock(&master_lock);
+ }
+
+ return index;
+}
+
+static struct cpuidle_driver ux500_idle_driver = {
+ .name = "ux500_idle",
+ .owner = THIS_MODULE,
+ .states = {
+ ARM_CPUIDLE_WFI_STATE,
+ {
+ .enter = ux500_enter_idle,
+ .exit_latency = 70,
+ .target_residency = 260,
+ .flags = CPUIDLE_FLAG_TIMER_STOP,
+ .name = "ApIdle",
+ .desc = "ARM Retention",
+ },
+ },
+ .safe_state_index = 0,
+ .state_count = 2,
+};
+
+static int dbx500_cpuidle_probe(struct platform_device *pdev)
+{
+ /* Configure wake up reasons */
+ prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
+ PRCMU_WAKEUP(ABB));
+
+ return cpuidle_register(&ux500_idle_driver, NULL);
+}
+
+static struct platform_driver dbx500_cpuidle_plat_driver = {
+ .driver = {
+ .name = "cpuidle-dbx500",
+ },
+ .probe = dbx500_cpuidle_probe,
+};
+builtin_platform_driver(dbx500_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle-zynq.c b/drivers/cpuidle/cpuidle-zynq.c
new file mode 100644
index 000000000..6f4257fc5
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-zynq.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2012-2013 Xilinx
+ *
+ * CPU idle support for Xilinx Zynq
+ *
+ * based on arch/arm/mach-at91/cpuidle.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The cpu idle uses wait-for-interrupt and RAM self refresh in order
+ * to implement two idle states -
+ * #1 wait-for-interrupt
+ * #2 wait-for-interrupt and RAM self refresh
+ *
+ * Maintainer: Michal Simek <michal.simek@xilinx.com>
+ */
+
+#include <linux/init.h>
+#include <linux/cpuidle.h>
+#include <linux/platform_device.h>
+#include <asm/cpuidle.h>
+
+#define ZYNQ_MAX_STATES 2
+
+/* Actual code that puts the SoC in different idle states */
+static int zynq_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ /* Add code for DDR self refresh start */
+ cpu_do_idle();
+
+ return index;
+}
+
+static struct cpuidle_driver zynq_idle_driver = {
+ .name = "zynq_idle",
+ .owner = THIS_MODULE,
+ .states = {
+ ARM_CPUIDLE_WFI_STATE,
+ {
+ .enter = zynq_enter_idle,
+ .exit_latency = 10,
+ .target_residency = 10000,
+ .name = "RAM_SR",
+ .desc = "WFI and RAM Self Refresh",
+ },
+ },
+ .safe_state_index = 0,
+ .state_count = ZYNQ_MAX_STATES,
+};
+
+/* Initialize CPU idle by registering the idle states */
+static int zynq_cpuidle_probe(struct platform_device *pdev)
+{
+ pr_info("Xilinx Zynq CpuIdle Driver started\n");
+
+ return cpuidle_register(&zynq_idle_driver, NULL);
+}
+
+static struct platform_driver zynq_cpuidle_driver = {
+ .driver = {
+ .name = "cpuidle-zynq",
+ },
+ .probe = zynq_cpuidle_probe,
+};
+builtin_platform_driver(zynq_cpuidle_driver);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
new file mode 100644
index 000000000..2d182dc1b
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.c
@@ -0,0 +1,706 @@
+/*
+ * cpuidle.c - core cpuidle infrastructure
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Shaohua Li <shaohua.li@intel.com>
+ * Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/clockchips.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/notifier.h>
+#include <linux/pm_qos.h>
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/module.h>
+#include <linux/suspend.h>
+#include <linux/tick.h>
+#include <trace/events/power.h>
+
+#include "cpuidle.h"
+
+DEFINE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
+DEFINE_PER_CPU(struct cpuidle_device, cpuidle_dev);
+
+DEFINE_MUTEX(cpuidle_lock);
+LIST_HEAD(cpuidle_detected_devices);
+
+static int enabled_devices;
+static int off __read_mostly;
+static int initialized __read_mostly;
+
+int cpuidle_disabled(void)
+{
+ return off;
+}
+void disable_cpuidle(void)
+{
+ off = 1;
+}
+
+bool cpuidle_not_available(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ return off || !initialized || !drv || !dev || !dev->enabled;
+}
+
+/**
+ * cpuidle_play_dead - cpu off-lining
+ *
+ * Returns in case of an error or no driver
+ */
+int cpuidle_play_dead(void)
+{
+ struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices);
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ int i;
+
+ if (!drv)
+ return -ENODEV;
+
+ /* Find lowest-power state that supports long-term idle */
+ for (i = drv->state_count - 1; i >= 0; i--)
+ if (drv->states[i].enter_dead)
+ return drv->states[i].enter_dead(dev, i);
+
+ return -ENODEV;
+}
+
+static int find_deepest_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev,
+ unsigned int max_latency,
+ unsigned int forbidden_flags,
+ bool s2idle)
+{
+ unsigned int latency_req = 0;
+ int i, ret = 0;
+
+ for (i = 1; i < drv->state_count; i++) {
+ struct cpuidle_state *s = &drv->states[i];
+ struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+ if (s->disabled || su->disable || s->exit_latency <= latency_req
+ || s->exit_latency > max_latency
+ || (s->flags & forbidden_flags)
+ || (s2idle && !s->enter_s2idle))
+ continue;
+
+ latency_req = s->exit_latency;
+ ret = i;
+ }
+ return ret;
+}
+
+/**
+ * cpuidle_use_deepest_state - Set/clear governor override flag.
+ * @enable: New value of the flag.
+ *
+ * Set/unset the current CPU to use the deepest idle state (override governors
+ * going forward if set).
+ */
+void cpuidle_use_deepest_state(bool enable)
+{
+ struct cpuidle_device *dev;
+
+ preempt_disable();
+ dev = cpuidle_get_device();
+ if (dev)
+ dev->use_deepest_state = enable;
+ preempt_enable();
+}
+
+/**
+ * cpuidle_find_deepest_state - Find the deepest available idle state.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ */
+int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ return find_deepest_state(drv, dev, UINT_MAX, 0, false);
+}
+
+#ifdef CONFIG_SUSPEND
+static void enter_s2idle_proper(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, int index)
+{
+ ktime_t time_start, time_end;
+
+ time_start = ns_to_ktime(local_clock());
+
+ /*
+ * trace_suspend_resume() called by tick_freeze() for the last CPU
+ * executing it contains RCU usage regarded as invalid in the idle
+ * context, so tell RCU about that.
+ */
+ RCU_NONIDLE(tick_freeze());
+ /*
+ * The state used here cannot be a "coupled" one, because the "coupled"
+ * cpuidle mechanism enables interrupts and doing that with timekeeping
+ * suspended is generally unsafe.
+ */
+ stop_critical_timings();
+ drv->states[index].enter_s2idle(dev, drv, index);
+ if (WARN_ON_ONCE(!irqs_disabled()))
+ local_irq_disable();
+ /*
+ * timekeeping_resume() that will be called by tick_unfreeze() for the
+ * first CPU executing it calls functions containing RCU read-side
+ * critical sections, so tell RCU about that.
+ */
+ RCU_NONIDLE(tick_unfreeze());
+ start_critical_timings();
+
+ time_end = ns_to_ktime(local_clock());
+
+ dev->states_usage[index].s2idle_time += ktime_us_delta(time_end, time_start);
+ dev->states_usage[index].s2idle_usage++;
+}
+
+/**
+ * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
+ * @drv: cpuidle driver for the given CPU.
+ * @dev: cpuidle device for the given CPU.
+ *
+ * If there are states with the ->enter_s2idle callback, find the deepest of
+ * them and enter it with frozen tick.
+ */
+int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ int index;
+
+ /*
+ * Find the deepest state with ->enter_s2idle present, which guarantees
+ * that interrupts won't be enabled when it exits and allows the tick to
+ * be frozen safely.
+ */
+ index = find_deepest_state(drv, dev, UINT_MAX, 0, true);
+ if (index > 0)
+ enter_s2idle_proper(drv, dev, index);
+
+ return index;
+}
+#endif /* CONFIG_SUSPEND */
+
+/**
+ * cpuidle_enter_state - enter the state and update stats
+ * @dev: cpuidle device for this cpu
+ * @drv: cpuidle driver for this cpu
+ * @index: index into the states table in @drv of the state to enter
+ */
+int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
+ int index)
+{
+ int entered_state;
+
+ struct cpuidle_state *target_state = &drv->states[index];
+ bool broadcast = !!(target_state->flags & CPUIDLE_FLAG_TIMER_STOP);
+ ktime_t time_start, time_end;
+ s64 diff;
+
+ /*
+ * Tell the time framework to switch to a broadcast timer because our
+ * local timer will be shut down. If a local timer is used from another
+ * CPU as a broadcast timer, this call may fail if it is not available.
+ */
+ if (broadcast && tick_broadcast_enter()) {
+ index = find_deepest_state(drv, dev, target_state->exit_latency,
+ CPUIDLE_FLAG_TIMER_STOP, false);
+ if (index < 0) {
+ default_idle_call();
+ return -EBUSY;
+ }
+ target_state = &drv->states[index];
+ broadcast = false;
+ }
+
+ /* Take note of the planned idle state. */
+ sched_idle_set_state(target_state);
+
+ trace_cpu_idle_rcuidle(index, dev->cpu);
+ time_start = ns_to_ktime(local_clock());
+
+ stop_critical_timings();
+ entered_state = target_state->enter(dev, drv, index);
+ start_critical_timings();
+
+ sched_clock_idle_wakeup_event();
+ time_end = ns_to_ktime(local_clock());
+ trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, dev->cpu);
+
+ /* The cpu is no longer idle or about to enter idle. */
+ sched_idle_set_state(NULL);
+
+ if (broadcast) {
+ if (WARN_ON_ONCE(!irqs_disabled()))
+ local_irq_disable();
+
+ tick_broadcast_exit();
+ }
+
+ if (!cpuidle_state_is_coupled(drv, index))
+ local_irq_enable();
+
+ diff = ktime_us_delta(time_end, time_start);
+ if (diff > INT_MAX)
+ diff = INT_MAX;
+
+ dev->last_residency = (int) diff;
+
+ if (entered_state >= 0) {
+ /* Update cpuidle counters */
+ /* This can be moved to within driver enter routine
+ * but that results in multiple copies of same code.
+ */
+ dev->states_usage[entered_state].time += dev->last_residency;
+ dev->states_usage[entered_state].usage++;
+ } else {
+ dev->last_residency = 0;
+ }
+
+ return entered_state;
+}
+
+/**
+ * cpuidle_select - ask the cpuidle framework to choose an idle state
+ *
+ * @drv: the cpuidle driver
+ * @dev: the cpuidle device
+ * @stop_tick: indication on whether or not to stop the tick
+ *
+ * Returns the index of the idle state. The return value must not be negative.
+ *
+ * The memory location pointed to by @stop_tick is expected to be written the
+ * 'false' boolean value if the scheduler tick should not be stopped before
+ * entering the returned state.
+ */
+int cpuidle_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ bool *stop_tick)
+{
+ return cpuidle_curr_governor->select(drv, dev, stop_tick);
+}
+
+/**
+ * cpuidle_enter - enter into the specified idle state
+ *
+ * @drv: the cpuidle driver tied with the cpu
+ * @dev: the cpuidle device
+ * @index: the index in the idle state table
+ *
+ * Returns the index in the idle state, < 0 in case of error.
+ * The error code depends on the backend driver
+ */
+int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ int index)
+{
+ if (cpuidle_state_is_coupled(drv, index))
+ return cpuidle_enter_state_coupled(dev, drv, index);
+ return cpuidle_enter_state(dev, drv, index);
+}
+
+/**
+ * cpuidle_reflect - tell the underlying governor what was the state
+ * we were in
+ *
+ * @dev : the cpuidle device
+ * @index: the index in the idle state table
+ *
+ */
+void cpuidle_reflect(struct cpuidle_device *dev, int index)
+{
+ if (cpuidle_curr_governor->reflect && index >= 0)
+ cpuidle_curr_governor->reflect(dev, index);
+}
+
+/**
+ * cpuidle_install_idle_handler - installs the cpuidle idle loop handler
+ */
+void cpuidle_install_idle_handler(void)
+{
+ if (enabled_devices) {
+ /* Make sure all changes finished before we switch to new idle */
+ smp_wmb();
+ initialized = 1;
+ }
+}
+
+/**
+ * cpuidle_uninstall_idle_handler - uninstalls the cpuidle idle loop handler
+ */
+void cpuidle_uninstall_idle_handler(void)
+{
+ if (enabled_devices) {
+ initialized = 0;
+ wake_up_all_idle_cpus();
+ }
+
+ /*
+ * Make sure external observers (such as the scheduler)
+ * are done looking at pointed idle states.
+ */
+ synchronize_rcu();
+}
+
+/**
+ * cpuidle_pause_and_lock - temporarily disables CPUIDLE
+ */
+void cpuidle_pause_and_lock(void)
+{
+ mutex_lock(&cpuidle_lock);
+ cpuidle_uninstall_idle_handler();
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_pause_and_lock);
+
+/**
+ * cpuidle_resume_and_unlock - resumes CPUIDLE operation
+ */
+void cpuidle_resume_and_unlock(void)
+{
+ cpuidle_install_idle_handler();
+ mutex_unlock(&cpuidle_lock);
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock);
+
+/* Currently used in suspend/resume path to suspend cpuidle */
+void cpuidle_pause(void)
+{
+ mutex_lock(&cpuidle_lock);
+ cpuidle_uninstall_idle_handler();
+ mutex_unlock(&cpuidle_lock);
+}
+
+/* Currently used in suspend/resume path to resume cpuidle */
+void cpuidle_resume(void)
+{
+ mutex_lock(&cpuidle_lock);
+ cpuidle_install_idle_handler();
+ mutex_unlock(&cpuidle_lock);
+}
+
+/**
+ * cpuidle_enable_device - enables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+int cpuidle_enable_device(struct cpuidle_device *dev)
+{
+ int ret;
+ struct cpuidle_driver *drv;
+
+ if (!dev)
+ return -EINVAL;
+
+ if (dev->enabled)
+ return 0;
+
+ if (!cpuidle_curr_governor)
+ return -EIO;
+
+ drv = cpuidle_get_cpu_driver(dev);
+
+ if (!drv)
+ return -EIO;
+
+ if (!dev->registered)
+ return -EINVAL;
+
+ ret = cpuidle_add_device_sysfs(dev);
+ if (ret)
+ return ret;
+
+ if (cpuidle_curr_governor->enable) {
+ ret = cpuidle_curr_governor->enable(drv, dev);
+ if (ret)
+ goto fail_sysfs;
+ }
+
+ smp_wmb();
+
+ dev->enabled = 1;
+
+ enabled_devices++;
+ return 0;
+
+fail_sysfs:
+ cpuidle_remove_device_sysfs(dev);
+
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_enable_device);
+
+/**
+ * cpuidle_disable_device - disables idle PM for a CPU
+ * @dev: the CPU
+ *
+ * This function must be called between cpuidle_pause_and_lock and
+ * cpuidle_resume_and_unlock when used externally.
+ */
+void cpuidle_disable_device(struct cpuidle_device *dev)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+ if (!dev || !dev->enabled)
+ return;
+
+ if (!drv || !cpuidle_curr_governor)
+ return;
+
+ dev->enabled = 0;
+
+ if (cpuidle_curr_governor->disable)
+ cpuidle_curr_governor->disable(drv, dev);
+
+ cpuidle_remove_device_sysfs(dev);
+ enabled_devices--;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_disable_device);
+
+static void __cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+ list_del(&dev->device_list);
+ per_cpu(cpuidle_devices, dev->cpu) = NULL;
+ module_put(drv->owner);
+
+ dev->registered = 0;
+}
+
+static void __cpuidle_device_init(struct cpuidle_device *dev)
+{
+ memset(dev->states_usage, 0, sizeof(dev->states_usage));
+ dev->last_residency = 0;
+}
+
+/**
+ * __cpuidle_register_device - internal register function called before register
+ * and enable routines
+ * @dev: the cpu
+ *
+ * cpuidle_lock mutex must be held before this is called
+ */
+static int __cpuidle_register_device(struct cpuidle_device *dev)
+{
+ int ret;
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+ if (!try_module_get(drv->owner))
+ return -EINVAL;
+
+ per_cpu(cpuidle_devices, dev->cpu) = dev;
+ list_add(&dev->device_list, &cpuidle_detected_devices);
+
+ ret = cpuidle_coupled_register_device(dev);
+ if (ret)
+ __cpuidle_unregister_device(dev);
+ else
+ dev->registered = 1;
+
+ return ret;
+}
+
+/**
+ * cpuidle_register_device - registers a CPU's idle PM feature
+ * @dev: the cpu
+ */
+int cpuidle_register_device(struct cpuidle_device *dev)
+{
+ int ret = -EBUSY;
+
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&cpuidle_lock);
+
+ if (dev->registered)
+ goto out_unlock;
+
+ __cpuidle_device_init(dev);
+
+ ret = __cpuidle_register_device(dev);
+ if (ret)
+ goto out_unlock;
+
+ ret = cpuidle_add_sysfs(dev);
+ if (ret)
+ goto out_unregister;
+
+ ret = cpuidle_enable_device(dev);
+ if (ret)
+ goto out_sysfs;
+
+ cpuidle_install_idle_handler();
+
+out_unlock:
+ mutex_unlock(&cpuidle_lock);
+
+ return ret;
+
+out_sysfs:
+ cpuidle_remove_sysfs(dev);
+out_unregister:
+ __cpuidle_unregister_device(dev);
+ goto out_unlock;
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_register_device);
+
+/**
+ * cpuidle_unregister_device - unregisters a CPU's idle PM feature
+ * @dev: the cpu
+ */
+void cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+ if (!dev || dev->registered == 0)
+ return;
+
+ cpuidle_pause_and_lock();
+
+ cpuidle_disable_device(dev);
+
+ cpuidle_remove_sysfs(dev);
+
+ __cpuidle_unregister_device(dev);
+
+ cpuidle_coupled_unregister_device(dev);
+
+ cpuidle_resume_and_unlock();
+}
+
+EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
+
+/**
+ * cpuidle_unregister: unregister a driver and the devices. This function
+ * can be used only if the driver has been previously registered through
+ * the cpuidle_register function.
+ *
+ * @drv: a valid pointer to a struct cpuidle_driver
+ */
+void cpuidle_unregister(struct cpuidle_driver *drv)
+{
+ int cpu;
+ struct cpuidle_device *device;
+
+ for_each_cpu(cpu, drv->cpumask) {
+ device = &per_cpu(cpuidle_dev, cpu);
+ cpuidle_unregister_device(device);
+ }
+
+ cpuidle_unregister_driver(drv);
+}
+EXPORT_SYMBOL_GPL(cpuidle_unregister);
+
+/**
+ * cpuidle_register: registers the driver and the cpu devices with the
+ * coupled_cpus passed as parameter. This function is used for all common
+ * initialization pattern there are in the arch specific drivers. The
+ * devices is globally defined in this file.
+ *
+ * @drv : a valid pointer to a struct cpuidle_driver
+ * @coupled_cpus: a cpumask for the coupled states
+ *
+ * Returns 0 on success, < 0 otherwise
+ */
+int cpuidle_register(struct cpuidle_driver *drv,
+ const struct cpumask *const coupled_cpus)
+{
+ int ret, cpu;
+ struct cpuidle_device *device;
+
+ ret = cpuidle_register_driver(drv);
+ if (ret) {
+ pr_err("failed to register cpuidle driver\n");
+ return ret;
+ }
+
+ for_each_cpu(cpu, drv->cpumask) {
+ device = &per_cpu(cpuidle_dev, cpu);
+ device->cpu = cpu;
+
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
+ /*
+ * On multiplatform for ARM, the coupled idle states could be
+ * enabled in the kernel even if the cpuidle driver does not
+ * use it. Note, coupled_cpus is a struct copy.
+ */
+ if (coupled_cpus)
+ device->coupled_cpus = *coupled_cpus;
+#endif
+ ret = cpuidle_register_device(device);
+ if (!ret)
+ continue;
+
+ pr_err("Failed to register cpuidle device for cpu%d\n", cpu);
+
+ cpuidle_unregister(drv);
+ break;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpuidle_register);
+
+#ifdef CONFIG_SMP
+
+/*
+ * This function gets called when a part of the kernel has a new latency
+ * requirement. This means we need to get all processors out of their C-state,
+ * and then recalculate a new suitable C-state. Just do a cross-cpu IPI; that
+ * wakes them all right up.
+ */
+static int cpuidle_latency_notify(struct notifier_block *b,
+ unsigned long l, void *v)
+{
+ wake_up_all_idle_cpus();
+ return NOTIFY_OK;
+}
+
+static struct notifier_block cpuidle_latency_notifier = {
+ .notifier_call = cpuidle_latency_notify,
+};
+
+static inline void latency_notifier_init(struct notifier_block *n)
+{
+ pm_qos_add_notifier(PM_QOS_CPU_DMA_LATENCY, n);
+}
+
+#else /* CONFIG_SMP */
+
+#define latency_notifier_init(x) do { } while (0)
+
+#endif /* CONFIG_SMP */
+
+/**
+ * cpuidle_init - core initializer
+ */
+static int __init cpuidle_init(void)
+{
+ int ret;
+
+ if (cpuidle_disabled())
+ return -ENODEV;
+
+ ret = cpuidle_add_interface(cpu_subsys.dev_root);
+ if (ret)
+ return ret;
+
+ latency_notifier_init(&cpuidle_latency_notifier);
+
+ return 0;
+}
+
+module_param(off, int, 0444);
+core_initcall(cpuidle_init);
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h
new file mode 100644
index 000000000..2965ab32a
--- /dev/null
+++ b/drivers/cpuidle/cpuidle.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * cpuidle.h - The internal header file
+ */
+
+#ifndef __DRIVER_CPUIDLE_H
+#define __DRIVER_CPUIDLE_H
+
+/* For internal use only */
+extern struct cpuidle_governor *cpuidle_curr_governor;
+extern struct list_head cpuidle_governors;
+extern struct list_head cpuidle_detected_devices;
+extern struct mutex cpuidle_lock;
+extern spinlock_t cpuidle_driver_lock;
+extern int cpuidle_disabled(void);
+extern int cpuidle_enter_state(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int next_state);
+
+/* idle loop */
+extern void cpuidle_install_idle_handler(void);
+extern void cpuidle_uninstall_idle_handler(void);
+
+/* governors */
+extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
+
+/* sysfs */
+
+struct device;
+
+extern int cpuidle_add_interface(struct device *dev);
+extern void cpuidle_remove_interface(struct device *dev);
+extern int cpuidle_add_device_sysfs(struct cpuidle_device *device);
+extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device);
+extern int cpuidle_add_sysfs(struct cpuidle_device *dev);
+extern void cpuidle_remove_sysfs(struct cpuidle_device *dev);
+
+#ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state);
+int cpuidle_coupled_state_verify(struct cpuidle_driver *drv);
+int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int next_state);
+int cpuidle_coupled_register_device(struct cpuidle_device *dev);
+void cpuidle_coupled_unregister_device(struct cpuidle_device *dev);
+#else
+static inline
+bool cpuidle_state_is_coupled(struct cpuidle_driver *drv, int state)
+{
+ return false;
+}
+
+static inline int cpuidle_coupled_state_verify(struct cpuidle_driver *drv)
+{
+ return 0;
+}
+
+static inline int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int next_state)
+{
+ return -1;
+}
+
+static inline int cpuidle_coupled_register_device(struct cpuidle_device *dev)
+{
+ return 0;
+}
+
+static inline void cpuidle_coupled_unregister_device(struct cpuidle_device *dev)
+{
+}
+#endif
+
+#endif /* __DRIVER_CPUIDLE_H */
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
new file mode 100644
index 000000000..01acd88c4
--- /dev/null
+++ b/drivers/cpuidle/driver.c
@@ -0,0 +1,355 @@
+/*
+ * driver.c - driver support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Shaohua Li <shaohua.li@intel.com>
+ * Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/sched/idle.h>
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/tick.h>
+#include <linux/cpu.h>
+
+#include "cpuidle.h"
+
+DEFINE_SPINLOCK(cpuidle_driver_lock);
+
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+
+static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers);
+
+/**
+ * __cpuidle_get_cpu_driver - return the cpuidle driver tied to a CPU.
+ * @cpu: the CPU handled by the driver
+ *
+ * Returns a pointer to struct cpuidle_driver or NULL if no driver has been
+ * registered for @cpu.
+ */
+static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+ return per_cpu(cpuidle_drivers, cpu);
+}
+
+/**
+ * __cpuidle_unset_driver - unset per CPU driver variables.
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * For each CPU in the driver's CPU mask, unset the registered driver per CPU
+ * variable. If @drv is different from the registered driver, the corresponding
+ * variable is not cleared.
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
+{
+ int cpu;
+
+ for_each_cpu(cpu, drv->cpumask) {
+
+ if (drv != __cpuidle_get_cpu_driver(cpu))
+ continue;
+
+ per_cpu(cpuidle_drivers, cpu) = NULL;
+ }
+}
+
+/**
+ * __cpuidle_set_driver - set per CPU driver variables for the given driver.
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Returns 0 on success, -EBUSY if any CPU in the cpumask have a driver
+ * different from drv already.
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
+{
+ int cpu;
+
+ for_each_cpu(cpu, drv->cpumask) {
+ struct cpuidle_driver *old_drv;
+
+ old_drv = __cpuidle_get_cpu_driver(cpu);
+ if (old_drv && old_drv != drv)
+ return -EBUSY;
+ }
+
+ for_each_cpu(cpu, drv->cpumask)
+ per_cpu(cpuidle_drivers, cpu) = drv;
+
+ return 0;
+}
+
+#else
+
+static struct cpuidle_driver *cpuidle_curr_driver;
+
+/**
+ * __cpuidle_get_cpu_driver - return the global cpuidle driver pointer.
+ * @cpu: ignored without the multiple driver support
+ *
+ * Return a pointer to a struct cpuidle_driver object or NULL if no driver was
+ * previously registered.
+ */
+static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu)
+{
+ return cpuidle_curr_driver;
+}
+
+/**
+ * __cpuidle_set_driver - assign the global cpuidle driver variable.
+ * @drv: pointer to a struct cpuidle_driver object
+ *
+ * Returns 0 on success, -EBUSY if the driver is already registered.
+ */
+static inline int __cpuidle_set_driver(struct cpuidle_driver *drv)
+{
+ if (cpuidle_curr_driver)
+ return -EBUSY;
+
+ cpuidle_curr_driver = drv;
+
+ return 0;
+}
+
+/**
+ * __cpuidle_unset_driver - unset the global cpuidle driver variable.
+ * @drv: a pointer to a struct cpuidle_driver
+ *
+ * Reset the global cpuidle variable to NULL. If @drv does not match the
+ * registered driver, do nothing.
+ */
+static inline void __cpuidle_unset_driver(struct cpuidle_driver *drv)
+{
+ if (drv == cpuidle_curr_driver)
+ cpuidle_curr_driver = NULL;
+}
+
+#endif
+
+/**
+ * cpuidle_setup_broadcast_timer - enable/disable the broadcast timer on a cpu
+ * @arg: a void pointer used to match the SMP cross call API
+ *
+ * If @arg is NULL broadcast is disabled otherwise enabled
+ *
+ * This function is executed per CPU by an SMP cross call. It's not
+ * supposed to be called directly.
+ */
+static void cpuidle_setup_broadcast_timer(void *arg)
+{
+ if (arg)
+ tick_broadcast_enable();
+ else
+ tick_broadcast_disable();
+}
+
+/**
+ * __cpuidle_driver_init - initialize the driver's internal data
+ * @drv: a valid pointer to a struct cpuidle_driver
+ */
+static void __cpuidle_driver_init(struct cpuidle_driver *drv)
+{
+ int i;
+
+ drv->refcnt = 0;
+
+ /*
+ * Use all possible CPUs as the default, because if the kernel boots
+ * with some CPUs offline and then we online one of them, the CPU
+ * notifier has to know which driver to assign.
+ */
+ if (!drv->cpumask)
+ drv->cpumask = (struct cpumask *)cpu_possible_mask;
+
+ /*
+ * Look for the timer stop flag in the different states, so that we know
+ * if the broadcast timer has to be set up. The loop is in the reverse
+ * order, because usually one of the deeper states have this flag set.
+ */
+ for (i = drv->state_count - 1; i >= 0 ; i--) {
+ if (drv->states[i].flags & CPUIDLE_FLAG_TIMER_STOP) {
+ drv->bctimer = 1;
+ break;
+ }
+ }
+}
+
+/**
+ * __cpuidle_register_driver: register the driver
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Do some sanity checks, initialize the driver, assign the driver to the
+ * global cpuidle driver variable(s) and set up the broadcast timer if the
+ * cpuidle driver has some states that shut down the local timer.
+ *
+ * Returns 0 on success, a negative error code otherwise:
+ * * -EINVAL if the driver pointer is NULL or no idle states are available
+ * * -ENODEV if the cpuidle framework is disabled
+ * * -EBUSY if the driver is already assigned to the global variable(s)
+ */
+static int __cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+ int ret;
+
+ if (!drv || !drv->state_count)
+ return -EINVAL;
+
+ ret = cpuidle_coupled_state_verify(drv);
+ if (ret)
+ return ret;
+
+ if (cpuidle_disabled())
+ return -ENODEV;
+
+ __cpuidle_driver_init(drv);
+
+ ret = __cpuidle_set_driver(drv);
+ if (ret)
+ return ret;
+
+ if (drv->bctimer)
+ on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+ (void *)1, 1);
+
+ return 0;
+}
+
+/**
+ * __cpuidle_unregister_driver - unregister the driver
+ * @drv: a valid pointer to a struct cpuidle_driver
+ *
+ * Check if the driver is no longer in use, reset the global cpuidle driver
+ * variable(s) and disable the timer broadcast notification mechanism if it was
+ * in use.
+ *
+ */
+static void __cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+ if (WARN_ON(drv->refcnt > 0))
+ return;
+
+ if (drv->bctimer) {
+ drv->bctimer = 0;
+ on_each_cpu_mask(drv->cpumask, cpuidle_setup_broadcast_timer,
+ NULL, 1);
+ }
+
+ __cpuidle_unset_driver(drv);
+}
+
+/**
+ * cpuidle_register_driver - registers a driver
+ * @drv: a pointer to a valid struct cpuidle_driver
+ *
+ * Register the driver under a lock to prevent concurrent attempts to
+ * [un]register the driver from occuring at the same time.
+ *
+ * Returns 0 on success, a negative error code (returned by
+ * __cpuidle_register_driver()) otherwise.
+ */
+int cpuidle_register_driver(struct cpuidle_driver *drv)
+{
+ int ret;
+
+ spin_lock(&cpuidle_driver_lock);
+ ret = __cpuidle_register_driver(drv);
+ spin_unlock(&cpuidle_driver_lock);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(cpuidle_register_driver);
+
+/**
+ * cpuidle_unregister_driver - unregisters a driver
+ * @drv: a pointer to a valid struct cpuidle_driver
+ *
+ * Unregisters the cpuidle driver under a lock to prevent concurrent attempts
+ * to [un]register the driver from occuring at the same time. @drv has to
+ * match the currently registered driver.
+ */
+void cpuidle_unregister_driver(struct cpuidle_driver *drv)
+{
+ spin_lock(&cpuidle_driver_lock);
+ __cpuidle_unregister_driver(drv);
+ spin_unlock(&cpuidle_driver_lock);
+}
+EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
+
+/**
+ * cpuidle_get_driver - return the driver tied to the current CPU.
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered.
+ */
+struct cpuidle_driver *cpuidle_get_driver(void)
+{
+ struct cpuidle_driver *drv;
+ int cpu;
+
+ cpu = get_cpu();
+ drv = __cpuidle_get_cpu_driver(cpu);
+ put_cpu();
+
+ return drv;
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_driver);
+
+/**
+ * cpuidle_get_cpu_driver - return the driver registered for a CPU.
+ * @dev: a valid pointer to a struct cpuidle_device
+ *
+ * Returns a struct cpuidle_driver pointer, or NULL if no driver is registered
+ * for the CPU associated with @dev.
+ */
+struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev)
+{
+ if (!dev)
+ return NULL;
+
+ return __cpuidle_get_cpu_driver(dev->cpu);
+}
+EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver);
+
+/**
+ * cpuidle_driver_ref - get a reference to the driver.
+ *
+ * Increment the reference counter of the cpuidle driver associated with
+ * the current CPU.
+ *
+ * Returns a pointer to the driver, or NULL if the current CPU has no driver.
+ */
+struct cpuidle_driver *cpuidle_driver_ref(void)
+{
+ struct cpuidle_driver *drv;
+
+ spin_lock(&cpuidle_driver_lock);
+
+ drv = cpuidle_get_driver();
+ if (drv)
+ drv->refcnt++;
+
+ spin_unlock(&cpuidle_driver_lock);
+ return drv;
+}
+
+/**
+ * cpuidle_driver_unref - puts down the refcount for the driver
+ *
+ * Decrement the reference counter of the cpuidle driver associated with
+ * the current CPU.
+ */
+void cpuidle_driver_unref(void)
+{
+ struct cpuidle_driver *drv;
+
+ spin_lock(&cpuidle_driver_lock);
+
+ drv = cpuidle_get_driver();
+ if (drv && !WARN_ON(drv->refcnt <= 0))
+ drv->refcnt--;
+
+ spin_unlock(&cpuidle_driver_lock);
+}
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
new file mode 100644
index 000000000..53342b7f1
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -0,0 +1,229 @@
+/*
+ * DT idle states parsing code.
+ *
+ * Copyright (C) 2014 ARM Ltd.
+ * Author: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "DT idle-states: " fmt
+
+#include <linux/cpuidle.h>
+#include <linux/cpumask.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+
+#include "dt_idle_states.h"
+
+static int init_state_node(struct cpuidle_state *idle_state,
+ const struct of_device_id *matches,
+ struct device_node *state_node)
+{
+ int err;
+ const struct of_device_id *match_id;
+ const char *desc;
+
+ match_id = of_match_node(matches, state_node);
+ if (!match_id)
+ return -ENODEV;
+ /*
+ * CPUidle drivers are expected to initialize the const void *data
+ * pointer of the passed in struct of_device_id array to the idle
+ * state enter function.
+ */
+ idle_state->enter = match_id->data;
+ /*
+ * Since this is not a "coupled" state, it's safe to assume interrupts
+ * won't be enabled when it exits allowing the tick to be frozen
+ * safely. So enter() can be also enter_s2idle() callback.
+ */
+ idle_state->enter_s2idle = match_id->data;
+
+ err = of_property_read_u32(state_node, "wakeup-latency-us",
+ &idle_state->exit_latency);
+ if (err) {
+ u32 entry_latency, exit_latency;
+
+ err = of_property_read_u32(state_node, "entry-latency-us",
+ &entry_latency);
+ if (err) {
+ pr_debug(" * %pOF missing entry-latency-us property\n",
+ state_node);
+ return -EINVAL;
+ }
+
+ err = of_property_read_u32(state_node, "exit-latency-us",
+ &exit_latency);
+ if (err) {
+ pr_debug(" * %pOF missing exit-latency-us property\n",
+ state_node);
+ return -EINVAL;
+ }
+ /*
+ * If wakeup-latency-us is missing, default to entry+exit
+ * latencies as defined in idle states bindings
+ */
+ idle_state->exit_latency = entry_latency + exit_latency;
+ }
+
+ err = of_property_read_u32(state_node, "min-residency-us",
+ &idle_state->target_residency);
+ if (err) {
+ pr_debug(" * %pOF missing min-residency-us property\n",
+ state_node);
+ return -EINVAL;
+ }
+
+ err = of_property_read_string(state_node, "idle-state-name", &desc);
+ if (err)
+ desc = state_node->name;
+
+ idle_state->flags = 0;
+ if (of_property_read_bool(state_node, "local-timer-stop"))
+ idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+ /*
+ * TODO:
+ * replace with kstrdup and pointer assignment when name
+ * and desc become string pointers
+ */
+ strncpy(idle_state->name, state_node->name, CPUIDLE_NAME_LEN - 1);
+ strncpy(idle_state->desc, desc, CPUIDLE_DESC_LEN - 1);
+ return 0;
+}
+
+/*
+ * Check that the idle state is uniform across all CPUs in the CPUidle driver
+ * cpumask
+ */
+static bool idle_state_valid(struct device_node *state_node, unsigned int idx,
+ const cpumask_t *cpumask)
+{
+ int cpu;
+ struct device_node *cpu_node, *curr_state_node;
+ bool valid = true;
+
+ /*
+ * Compare idle state phandles for index idx on all CPUs in the
+ * CPUidle driver cpumask. Start from next logical cpu following
+ * cpumask_first(cpumask) since that's the CPU state_node was
+ * retrieved from. If a mismatch is found bail out straight
+ * away since we certainly hit a firmware misconfiguration.
+ */
+ for (cpu = cpumask_next(cpumask_first(cpumask), cpumask);
+ cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpumask)) {
+ cpu_node = of_cpu_device_node_get(cpu);
+ curr_state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
+ idx);
+ if (state_node != curr_state_node)
+ valid = false;
+
+ of_node_put(curr_state_node);
+ of_node_put(cpu_node);
+ if (!valid)
+ break;
+ }
+
+ return valid;
+}
+
+/**
+ * dt_init_idle_driver() - Parse the DT idle states and initialize the
+ * idle driver states array
+ * @drv: Pointer to CPU idle driver to be initialized
+ * @matches: Array of of_device_id match structures to search in for
+ * compatible idle state nodes. The data pointer for each valid
+ * struct of_device_id entry in the matches array must point to
+ * a function with the following signature, that corresponds to
+ * the CPUidle state enter function signature:
+ *
+ * int (*)(struct cpuidle_device *dev,
+ * struct cpuidle_driver *drv,
+ * int index);
+ *
+ * @start_idx: First idle state index to be initialized
+ *
+ * If DT idle states are detected and are valid the state count and states
+ * array entries in the cpuidle driver are initialized accordingly starting
+ * from index start_idx.
+ *
+ * Return: number of valid DT idle states parsed, <0 on failure
+ */
+int dt_init_idle_driver(struct cpuidle_driver *drv,
+ const struct of_device_id *matches,
+ unsigned int start_idx)
+{
+ struct cpuidle_state *idle_state;
+ struct device_node *state_node, *cpu_node;
+ int i, err = 0;
+ const cpumask_t *cpumask;
+ unsigned int state_idx = start_idx;
+
+ if (state_idx >= CPUIDLE_STATE_MAX)
+ return -EINVAL;
+ /*
+ * We get the idle states for the first logical cpu in the
+ * driver mask (or cpu_possible_mask if the driver cpumask is not set)
+ * and we check through idle_state_valid() if they are uniform
+ * across CPUs, otherwise we hit a firmware misconfiguration.
+ */
+ cpumask = drv->cpumask ? : cpu_possible_mask;
+ cpu_node = of_cpu_device_node_get(cpumask_first(cpumask));
+
+ for (i = 0; ; i++) {
+ state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);
+ if (!state_node)
+ break;
+
+ if (!of_device_is_available(state_node)) {
+ of_node_put(state_node);
+ continue;
+ }
+
+ if (!idle_state_valid(state_node, i, cpumask)) {
+ pr_warn("%pOF idle state not valid, bailing out\n",
+ state_node);
+ err = -EINVAL;
+ break;
+ }
+
+ if (state_idx == CPUIDLE_STATE_MAX) {
+ pr_warn("State index reached static CPU idle driver states array size\n");
+ break;
+ }
+
+ idle_state = &drv->states[state_idx++];
+ err = init_state_node(idle_state, matches, state_node);
+ if (err) {
+ pr_err("Parsing idle state node %pOF failed with err %d\n",
+ state_node, err);
+ err = -EINVAL;
+ break;
+ }
+ of_node_put(state_node);
+ }
+
+ of_node_put(state_node);
+ of_node_put(cpu_node);
+ if (err)
+ return err;
+ /*
+ * Update the driver state count only if some valid DT idle states
+ * were detected
+ */
+ if (i)
+ drv->state_count = state_idx;
+
+ /*
+ * Return the number of present and valid DT idle states, which can
+ * also be 0 on platforms with missing DT idle states or legacy DT
+ * configuration predating the DT idle states bindings.
+ */
+ return i;
+}
+EXPORT_SYMBOL_GPL(dt_init_idle_driver);
diff --git a/drivers/cpuidle/dt_idle_states.h b/drivers/cpuidle/dt_idle_states.h
new file mode 100644
index 000000000..14ae88cef
--- /dev/null
+++ b/drivers/cpuidle/dt_idle_states.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DT_IDLE_STATES
+#define __DT_IDLE_STATES
+
+int dt_init_idle_driver(struct cpuidle_driver *drv,
+ const struct of_device_id *matches,
+ unsigned int start_idx);
+#endif
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c
new file mode 100644
index 000000000..9fed1b829
--- /dev/null
+++ b/drivers/cpuidle/governor.c
@@ -0,0 +1,110 @@
+/*
+ * governor.c - governor support
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Shaohua Li <shaohua.li@intel.com>
+ * Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/cpu.h>
+#include <linux/cpuidle.h>
+#include <linux/mutex.h>
+#include <linux/pm_qos.h>
+
+#include "cpuidle.h"
+
+LIST_HEAD(cpuidle_governors);
+struct cpuidle_governor *cpuidle_curr_governor;
+
+/**
+ * __cpuidle_find_governor - finds a governor of the specified name
+ * @str: the name
+ *
+ * Must be called with cpuidle_lock acquired.
+ */
+static struct cpuidle_governor * __cpuidle_find_governor(const char *str)
+{
+ struct cpuidle_governor *gov;
+
+ list_for_each_entry(gov, &cpuidle_governors, governor_list)
+ if (!strncasecmp(str, gov->name, CPUIDLE_NAME_LEN))
+ return gov;
+
+ return NULL;
+}
+
+/**
+ * cpuidle_switch_governor - changes the governor
+ * @gov: the new target governor
+ * Must be called with cpuidle_lock acquired.
+ */
+int cpuidle_switch_governor(struct cpuidle_governor *gov)
+{
+ struct cpuidle_device *dev;
+
+ if (!gov)
+ return -EINVAL;
+
+ if (gov == cpuidle_curr_governor)
+ return 0;
+
+ cpuidle_uninstall_idle_handler();
+
+ if (cpuidle_curr_governor) {
+ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+ cpuidle_disable_device(dev);
+ }
+
+ cpuidle_curr_governor = gov;
+
+ if (gov) {
+ list_for_each_entry(dev, &cpuidle_detected_devices, device_list)
+ cpuidle_enable_device(dev);
+ cpuidle_install_idle_handler();
+ printk(KERN_INFO "cpuidle: using governor %s\n", gov->name);
+ }
+
+ return 0;
+}
+
+/**
+ * cpuidle_register_governor - registers a governor
+ * @gov: the governor
+ */
+int cpuidle_register_governor(struct cpuidle_governor *gov)
+{
+ int ret = -EEXIST;
+
+ if (!gov || !gov->select)
+ return -EINVAL;
+
+ if (cpuidle_disabled())
+ return -ENODEV;
+
+ mutex_lock(&cpuidle_lock);
+ if (__cpuidle_find_governor(gov->name) == NULL) {
+ ret = 0;
+ list_add_tail(&gov->governor_list, &cpuidle_governors);
+ if (!cpuidle_curr_governor ||
+ cpuidle_curr_governor->rating < gov->rating)
+ cpuidle_switch_governor(gov);
+ }
+ mutex_unlock(&cpuidle_lock);
+
+ return ret;
+}
+
+/**
+ * cpuidle_governor_latency_req - Compute a latency constraint for CPU
+ * @cpu: Target CPU
+ */
+int cpuidle_governor_latency_req(unsigned int cpu)
+{
+ int global_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
+ struct device *device = get_cpu_device(cpu);
+ int device_req = dev_pm_qos_raw_read_value(device);
+
+ return device_req < global_req ? device_req : global_req;
+}
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
new file mode 100644
index 000000000..1b5127226
--- /dev/null
+++ b/drivers/cpuidle/governors/Makefile
@@ -0,0 +1,6 @@
+#
+# Makefile for cpuidle governors.
+#
+
+obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
+obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
new file mode 100644
index 000000000..704880a66
--- /dev/null
+++ b/drivers/cpuidle/governors/ladder.c
@@ -0,0 +1,199 @@
+/*
+ * ladder.c - the residency ladder algorithm
+ *
+ * Copyright (C) 2001, 2002 Andy Grover <andrew.grover@intel.com>
+ * Copyright (C) 2001, 2002 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
+ * Copyright (C) 2004, 2005 Dominik Brodowski <linux@brodo.de>
+ *
+ * (C) 2006-2007 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Shaohua Li <shaohua.li@intel.com>
+ * Adam Belay <abelay@novell.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/jiffies.h>
+#include <linux/tick.h>
+
+#include <asm/io.h>
+#include <linux/uaccess.h>
+
+#define PROMOTION_COUNT 4
+#define DEMOTION_COUNT 1
+
+struct ladder_device_state {
+ struct {
+ u32 promotion_count;
+ u32 demotion_count;
+ u32 promotion_time;
+ u32 demotion_time;
+ } threshold;
+ struct {
+ int promotion_count;
+ int demotion_count;
+ } stats;
+};
+
+struct ladder_device {
+ struct ladder_device_state states[CPUIDLE_STATE_MAX];
+ int last_state_idx;
+};
+
+static DEFINE_PER_CPU(struct ladder_device, ladder_devices);
+
+/**
+ * ladder_do_selection - prepares private data for a state change
+ * @ldev: the ladder device
+ * @old_idx: the current state index
+ * @new_idx: the new target state index
+ */
+static inline void ladder_do_selection(struct ladder_device *ldev,
+ int old_idx, int new_idx)
+{
+ ldev->states[old_idx].stats.promotion_count = 0;
+ ldev->states[old_idx].stats.demotion_count = 0;
+ ldev->last_state_idx = new_idx;
+}
+
+/**
+ * ladder_select_state - selects the next state to enter
+ * @drv: cpuidle driver
+ * @dev: the CPU
+ * @dummy: not used
+ */
+static int ladder_select_state(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev, bool *dummy)
+{
+ struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
+ struct ladder_device_state *last_state;
+ int last_residency, last_idx = ldev->last_state_idx;
+ int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
+ int latency_req = cpuidle_governor_latency_req(dev->cpu);
+
+ /* Special case when user has set very strict latency requirement */
+ if (unlikely(latency_req == 0)) {
+ ladder_do_selection(ldev, last_idx, 0);
+ return 0;
+ }
+
+ last_state = &ldev->states[last_idx];
+
+ last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency;
+
+ /* consider promotion */
+ if (last_idx < drv->state_count - 1 &&
+ !drv->states[last_idx + 1].disabled &&
+ !dev->states_usage[last_idx + 1].disable &&
+ last_residency > last_state->threshold.promotion_time &&
+ drv->states[last_idx + 1].exit_latency <= latency_req) {
+ last_state->stats.promotion_count++;
+ last_state->stats.demotion_count = 0;
+ if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) {
+ ladder_do_selection(ldev, last_idx, last_idx + 1);
+ return last_idx + 1;
+ }
+ }
+
+ /* consider demotion */
+ if (last_idx > first_idx &&
+ (drv->states[last_idx].disabled ||
+ dev->states_usage[last_idx].disable ||
+ drv->states[last_idx].exit_latency > latency_req)) {
+ int i;
+
+ for (i = last_idx - 1; i > first_idx; i--) {
+ if (drv->states[i].exit_latency <= latency_req)
+ break;
+ }
+ ladder_do_selection(ldev, last_idx, i);
+ return i;
+ }
+
+ if (last_idx > first_idx &&
+ last_residency < last_state->threshold.demotion_time) {
+ last_state->stats.demotion_count++;
+ last_state->stats.promotion_count = 0;
+ if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) {
+ ladder_do_selection(ldev, last_idx, last_idx - 1);
+ return last_idx - 1;
+ }
+ }
+
+ /* otherwise remain at the current state */
+ return last_idx;
+}
+
+/**
+ * ladder_enable_device - setup for the governor
+ * @drv: cpuidle driver
+ * @dev: the CPU
+ */
+static int ladder_enable_device(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ int i;
+ int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0;
+ struct ladder_device *ldev = &per_cpu(ladder_devices, dev->cpu);
+ struct ladder_device_state *lstate;
+ struct cpuidle_state *state;
+
+ ldev->last_state_idx = first_idx;
+
+ for (i = first_idx; i < drv->state_count; i++) {
+ state = &drv->states[i];
+ lstate = &ldev->states[i];
+
+ lstate->stats.promotion_count = 0;
+ lstate->stats.demotion_count = 0;
+
+ lstate->threshold.promotion_count = PROMOTION_COUNT;
+ lstate->threshold.demotion_count = DEMOTION_COUNT;
+
+ if (i < drv->state_count - 1)
+ lstate->threshold.promotion_time = state->exit_latency;
+ if (i > first_idx)
+ lstate->threshold.demotion_time = state->exit_latency;
+ }
+
+ return 0;
+}
+
+/**
+ * ladder_reflect - update the correct last_state_idx
+ * @dev: the CPU
+ * @index: the index of actual state entered
+ */
+static void ladder_reflect(struct cpuidle_device *dev, int index)
+{
+ struct ladder_device *ldev = this_cpu_ptr(&ladder_devices);
+ if (index > 0)
+ ldev->last_state_idx = index;
+}
+
+static struct cpuidle_governor ladder_governor = {
+ .name = "ladder",
+ .rating = 10,
+ .enable = ladder_enable_device,
+ .select = ladder_select_state,
+ .reflect = ladder_reflect,
+};
+
+/**
+ * init_ladder - initializes the governor
+ */
+static int __init init_ladder(void)
+{
+ /*
+ * When NO_HZ is disabled, or when booting with nohz=off, the ladder
+ * governor is better so give it a higher rating than the menu
+ * governor.
+ */
+ if (!tick_nohz_enabled)
+ ladder_governor.rating = 25;
+
+ return cpuidle_register_governor(&ladder_governor);
+}
+
+postcore_initcall(init_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
new file mode 100644
index 000000000..6d7f6b9bb
--- /dev/null
+++ b/drivers/cpuidle/governors/menu.c
@@ -0,0 +1,609 @@
+/*
+ * menu.c - the menu idle governor
+ *
+ * Copyright (C) 2006-2007 Adam Belay <abelay@novell.com>
+ * Copyright (C) 2009 Intel Corporation
+ * Author:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This code is licenced under the GPL version 2 as described
+ * in the COPYING file that acompanies the Linux Kernel.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/time.h>
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+#include <linux/tick.h>
+#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
+#include <linux/sched/stat.h>
+#include <linux/math64.h>
+
+/*
+ * Please note when changing the tuning values:
+ * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
+ * a scaling operation multiplication may overflow on 32 bit platforms.
+ * In that case, #define RESOLUTION as ULL to get 64 bit result:
+ * #define RESOLUTION 1024ULL
+ *
+ * The default values do not overflow.
+ */
+#define BUCKETS 12
+#define INTERVAL_SHIFT 3
+#define INTERVALS (1UL << INTERVAL_SHIFT)
+#define RESOLUTION 1024
+#define DECAY 8
+#define MAX_INTERESTING 50000
+
+
+/*
+ * Concepts and ideas behind the menu governor
+ *
+ * For the menu governor, there are 3 decision factors for picking a C
+ * state:
+ * 1) Energy break even point
+ * 2) Performance impact
+ * 3) Latency tolerance (from pmqos infrastructure)
+ * These these three factors are treated independently.
+ *
+ * Energy break even point
+ * -----------------------
+ * C state entry and exit have an energy cost, and a certain amount of time in
+ * the C state is required to actually break even on this cost. CPUIDLE
+ * provides us this duration in the "target_residency" field. So all that we
+ * need is a good prediction of how long we'll be idle. Like the traditional
+ * menu governor, we start with the actual known "next timer event" time.
+ *
+ * Since there are other source of wakeups (interrupts for example) than
+ * the next timer event, this estimation is rather optimistic. To get a
+ * more realistic estimate, a correction factor is applied to the estimate,
+ * that is based on historic behavior. For example, if in the past the actual
+ * duration always was 50% of the next timer tick, the correction factor will
+ * be 0.5.
+ *
+ * menu uses a running average for this correction factor, however it uses a
+ * set of factors, not just a single factor. This stems from the realization
+ * that the ratio is dependent on the order of magnitude of the expected
+ * duration; if we expect 500 milliseconds of idle time the likelihood of
+ * getting an interrupt very early is much higher than if we expect 50 micro
+ * seconds of idle time. A second independent factor that has big impact on
+ * the actual factor is if there is (disk) IO outstanding or not.
+ * (as a special twist, we consider every sleep longer than 50 milliseconds
+ * as perfect; there are no power gains for sleeping longer than this)
+ *
+ * For these two reasons we keep an array of 12 independent factors, that gets
+ * indexed based on the magnitude of the expected duration as well as the
+ * "is IO outstanding" property.
+ *
+ * Repeatable-interval-detector
+ * ----------------------------
+ * There are some cases where "next timer" is a completely unusable predictor:
+ * Those cases where the interval is fixed, for example due to hardware
+ * interrupt mitigation, but also due to fixed transfer rate devices such as
+ * mice.
+ * For this, we use a different predictor: We track the duration of the last 8
+ * intervals and if the stand deviation of these 8 intervals is below a
+ * threshold value, we use the average of these intervals as prediction.
+ *
+ * Limiting Performance Impact
+ * ---------------------------
+ * C states, especially those with large exit latencies, can have a real
+ * noticeable impact on workloads, which is not acceptable for most sysadmins,
+ * and in addition, less performance has a power price of its own.
+ *
+ * As a general rule of thumb, menu assumes that the following heuristic
+ * holds:
+ * The busier the system, the less impact of C states is acceptable
+ *
+ * This rule-of-thumb is implemented using a performance-multiplier:
+ * If the exit latency times the performance multiplier is longer than
+ * the predicted duration, the C state is not considered a candidate
+ * for selection due to a too high performance impact. So the higher
+ * this multiplier is, the longer we need to be idle to pick a deep C
+ * state, and thus the less likely a busy CPU will hit such a deep
+ * C state.
+ *
+ * Two factors are used in determing this multiplier:
+ * a value of 10 is added for each point of "per cpu load average" we have.
+ * a value of 5 points is added for each process that is waiting for
+ * IO on this CPU.
+ * (these values are experimentally determined)
+ *
+ * The load average factor gives a longer term (few seconds) input to the
+ * decision, while the iowait value gives a cpu local instantanious input.
+ * The iowait factor may look low, but realize that this is also already
+ * represented in the system load average.
+ *
+ */
+
+struct menu_device {
+ int last_state_idx;
+ int needs_update;
+ int tick_wakeup;
+
+ unsigned int next_timer_us;
+ unsigned int predicted_us;
+ unsigned int bucket;
+ unsigned int correction_factor[BUCKETS];
+ unsigned int intervals[INTERVALS];
+ int interval_ptr;
+};
+
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+
+static inline int get_loadavg(unsigned long load)
+{
+ return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
+}
+
+static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
+{
+ int bucket = 0;
+
+ /*
+ * We keep two groups of stats; one with no
+ * IO pending, one without.
+ * This allows us to calculate
+ * E(duration)|iowait
+ */
+ if (nr_iowaiters)
+ bucket = BUCKETS/2;
+
+ if (duration < 10)
+ return bucket;
+ if (duration < 100)
+ return bucket + 1;
+ if (duration < 1000)
+ return bucket + 2;
+ if (duration < 10000)
+ return bucket + 3;
+ if (duration < 100000)
+ return bucket + 4;
+ return bucket + 5;
+}
+
+/*
+ * Return a multiplier for the exit latency that is intended
+ * to take performance requirements into account.
+ * The more performance critical we estimate the system
+ * to be, the higher this multiplier, and thus the higher
+ * the barrier to go to an expensive C state.
+ */
+static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load)
+{
+ int mult = 1;
+
+ /* for higher loadavg, we are more reluctant */
+
+ mult += 2 * get_loadavg(load);
+
+ /* for IO wait tasks (per cpu!) we add 5x each */
+ mult += 10 * nr_iowaiters;
+
+ return mult;
+}
+
+static DEFINE_PER_CPU(struct menu_device, menu_devices);
+
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
+
+/*
+ * Try detecting repeating patterns by keeping track of the last 8
+ * intervals, and checking if the standard deviation of that set
+ * of points is below a threshold. If it is... then use the
+ * average of these 8 points as the estimated value.
+ */
+static unsigned int get_typical_interval(struct menu_device *data)
+{
+ int i, divisor;
+ unsigned int max, thresh, avg;
+ uint64_t sum, variance;
+
+ thresh = UINT_MAX; /* Discard outliers above this value */
+
+again:
+
+ /* First calculate the average of past intervals */
+ max = 0;
+ sum = 0;
+ divisor = 0;
+ for (i = 0; i < INTERVALS; i++) {
+ unsigned int value = data->intervals[i];
+ if (value <= thresh) {
+ sum += value;
+ divisor++;
+ if (value > max)
+ max = value;
+ }
+ }
+ if (divisor == INTERVALS)
+ avg = sum >> INTERVAL_SHIFT;
+ else
+ avg = div_u64(sum, divisor);
+
+ /* Then try to determine variance */
+ variance = 0;
+ for (i = 0; i < INTERVALS; i++) {
+ unsigned int value = data->intervals[i];
+ if (value <= thresh) {
+ int64_t diff = (int64_t)value - avg;
+ variance += diff * diff;
+ }
+ }
+ if (divisor == INTERVALS)
+ variance >>= INTERVAL_SHIFT;
+ else
+ do_div(variance, divisor);
+
+ /*
+ * The typical interval is obtained when standard deviation is
+ * small (stddev <= 20 us, variance <= 400 us^2) or standard
+ * deviation is small compared to the average interval (avg >
+ * 6*stddev, avg^2 > 36*variance). The average is smaller than
+ * UINT_MAX aka U32_MAX, so computing its square does not
+ * overflow a u64. We simply reject this candidate average if
+ * the standard deviation is greater than 715 s (which is
+ * rather unlikely).
+ *
+ * Use this result only if there is no timer to wake us up sooner.
+ */
+ if (likely(variance <= U64_MAX/36)) {
+ if ((((u64)avg*avg > variance*36) && (divisor * 4 >= INTERVALS * 3))
+ || variance <= 400) {
+ return avg;
+ }
+ }
+
+ /*
+ * If we have outliers to the upside in our distribution, discard
+ * those by setting the threshold to exclude these outliers, then
+ * calculate the average and standard deviation again. Once we get
+ * down to the bottom 3/4 of our samples, stop excluding samples.
+ *
+ * This can deal with workloads that have long pauses interspersed
+ * with sporadic activity with a bunch of short pauses.
+ */
+ if ((divisor * 4) <= INTERVALS * 3)
+ return UINT_MAX;
+
+ thresh = max - 1;
+ goto again;
+}
+
+/**
+ * menu_select - selects the next idle state to enter
+ * @drv: cpuidle driver containing state data
+ * @dev: the CPU
+ * @stop_tick: indication on whether or not to stop the tick
+ */
+static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+ bool *stop_tick)
+{
+ struct menu_device *data = this_cpu_ptr(&menu_devices);
+ int latency_req = cpuidle_governor_latency_req(dev->cpu);
+ int i;
+ int first_idx;
+ int idx;
+ unsigned int interactivity_req;
+ unsigned int expected_interval;
+ unsigned long nr_iowaiters, cpu_load;
+ ktime_t delta_next;
+
+ if (data->needs_update) {
+ menu_update(drv, dev);
+ data->needs_update = 0;
+ }
+
+ /* Special case when user has set very strict latency requirement */
+ if (unlikely(latency_req == 0)) {
+ *stop_tick = false;
+ return 0;
+ }
+
+ /* determine the expected residency time, round up */
+ data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
+
+ get_iowait_load(&nr_iowaiters, &cpu_load);
+ data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
+
+ /*
+ * Force the result of multiplication to be 64 bits even if both
+ * operands are 32 bits.
+ * Make sure to round up for half microseconds.
+ */
+ data->predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
+ data->correction_factor[data->bucket],
+ RESOLUTION * DECAY);
+
+ expected_interval = get_typical_interval(data);
+ expected_interval = min(expected_interval, data->next_timer_us);
+
+ first_idx = 0;
+ if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) {
+ struct cpuidle_state *s = &drv->states[1];
+ unsigned int polling_threshold;
+
+ /*
+ * Default to a physical idle state, not to busy polling, unless
+ * a timer is going to trigger really really soon.
+ */
+ polling_threshold = max_t(unsigned int, 20, s->target_residency);
+ if (data->next_timer_us > polling_threshold &&
+ latency_req > s->exit_latency && !s->disabled &&
+ !dev->states_usage[1].disable)
+ first_idx = 1;
+ }
+
+ /*
+ * Use the lowest expected idle interval to pick the idle state.
+ */
+ data->predicted_us = min(data->predicted_us, expected_interval);
+
+ if (tick_nohz_tick_stopped()) {
+ /*
+ * If the tick is already stopped, the cost of possible short
+ * idle duration misprediction is much higher, because the CPU
+ * may be stuck in a shallow idle state for a long time as a
+ * result of it. In that case say we might mispredict and use
+ * the known time till the closest timer event for the idle
+ * state selection.
+ */
+ if (data->predicted_us < TICK_USEC)
+ data->predicted_us = ktime_to_us(delta_next);
+ } else {
+ /*
+ * Use the performance multiplier and the user-configurable
+ * latency_req to determine the maximum exit latency.
+ */
+ interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+ if (latency_req > interactivity_req)
+ latency_req = interactivity_req;
+ }
+
+ expected_interval = data->predicted_us;
+ /*
+ * Find the idle state with the lowest power while satisfying
+ * our constraints.
+ */
+ idx = -1;
+ for (i = first_idx; i < drv->state_count; i++) {
+ struct cpuidle_state *s = &drv->states[i];
+ struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+ if (s->disabled || su->disable)
+ continue;
+ if (idx == -1)
+ idx = i; /* first enabled state */
+ if (s->target_residency > data->predicted_us) {
+ if (data->predicted_us < TICK_USEC)
+ break;
+
+ if (!tick_nohz_tick_stopped()) {
+ /*
+ * If the state selected so far is shallow,
+ * waking up early won't hurt, so retain the
+ * tick in that case and let the governor run
+ * again in the next iteration of the loop.
+ */
+ expected_interval = drv->states[idx].target_residency;
+ break;
+ }
+
+ /*
+ * If the state selected so far is shallow and this
+ * state's target residency matches the time till the
+ * closest timer event, select this one to avoid getting
+ * stuck in the shallow one for too long.
+ */
+ if (drv->states[idx].target_residency < TICK_USEC &&
+ s->target_residency <= ktime_to_us(delta_next))
+ idx = i;
+
+ goto out;
+ }
+ if (s->exit_latency > latency_req) {
+ /*
+ * If we break out of the loop for latency reasons, use
+ * the target residency of the selected state as the
+ * expected idle duration so that the tick is retained
+ * as long as that target residency is low enough.
+ */
+ expected_interval = drv->states[idx].target_residency;
+ break;
+ }
+ idx = i;
+ }
+
+ if (idx == -1)
+ idx = 0; /* No states enabled. Must use 0. */
+
+ /*
+ * Don't stop the tick if the selected state is a polling one or if the
+ * expected idle duration is shorter than the tick period length.
+ */
+ if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
+ expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) {
+ unsigned int delta_next_us = ktime_to_us(delta_next);
+
+ *stop_tick = false;
+
+ if (idx > 0 && drv->states[idx].target_residency > delta_next_us) {
+ /*
+ * The tick is not going to be stopped and the target
+ * residency of the state to be returned is not within
+ * the time until the next timer event including the
+ * tick, so try to correct that.
+ */
+ for (i = idx - 1; i >= 0; i--) {
+ if (drv->states[i].disabled ||
+ dev->states_usage[i].disable)
+ continue;
+
+ idx = i;
+ if (drv->states[i].target_residency <= delta_next_us)
+ break;
+ }
+ }
+ }
+
+out:
+ data->last_state_idx = idx;
+
+ return data->last_state_idx;
+}
+
+/**
+ * menu_reflect - records that data structures need update
+ * @dev: the CPU
+ * @index: the index of actual entered state
+ *
+ * NOTE: it's important to be fast here because this operation will add to
+ * the overall exit latency.
+ */
+static void menu_reflect(struct cpuidle_device *dev, int index)
+{
+ struct menu_device *data = this_cpu_ptr(&menu_devices);
+
+ data->last_state_idx = index;
+ data->needs_update = 1;
+ data->tick_wakeup = tick_nohz_idle_got_tick();
+}
+
+/**
+ * menu_update - attempts to guess what happened after entry
+ * @drv: cpuidle driver containing state data
+ * @dev: the CPU
+ */
+static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+{
+ struct menu_device *data = this_cpu_ptr(&menu_devices);
+ int last_idx = data->last_state_idx;
+ struct cpuidle_state *target = &drv->states[last_idx];
+ unsigned int measured_us;
+ unsigned int new_factor;
+
+ /*
+ * Try to figure out how much time passed between entry to low
+ * power state and occurrence of the wakeup event.
+ *
+ * If the entered idle state didn't support residency measurements,
+ * we use them anyway if they are short, and if long,
+ * truncate to the whole expected time.
+ *
+ * Any measured amount of time will include the exit latency.
+ * Since we are interested in when the wakeup begun, not when it
+ * was completed, we must subtract the exit latency. However, if
+ * the measured amount of time is less than the exit latency,
+ * assume the state was never reached and the exit latency is 0.
+ */
+
+ if (data->tick_wakeup && data->next_timer_us > TICK_USEC) {
+ /*
+ * The nohz code said that there wouldn't be any events within
+ * the tick boundary (if the tick was stopped), but the idle
+ * duration predictor had a differing opinion. Since the CPU
+ * was woken up by a tick (that wasn't stopped after all), the
+ * predictor was not quite right, so assume that the CPU could
+ * have been idle long (but not forever) to help the idle
+ * duration predictor do a better job next time.
+ */
+ measured_us = 9 * MAX_INTERESTING / 10;
+ } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
+ dev->poll_time_limit) {
+ /*
+ * The CPU exited the "polling" state due to a time limit, so
+ * the idle duration prediction leading to the selection of that
+ * state was inaccurate. If a better prediction had been made,
+ * the CPU might have been woken up from idle by the next timer.
+ * Assume that to be the case.
+ */
+ measured_us = data->next_timer_us;
+ } else {
+ /* measured value */
+ measured_us = cpuidle_get_last_residency(dev);
+
+ /* Deduct exit latency */
+ if (measured_us > 2 * target->exit_latency)
+ measured_us -= target->exit_latency;
+ else
+ measured_us /= 2;
+ }
+
+ /* Make sure our coefficients do not exceed unity */
+ if (measured_us > data->next_timer_us)
+ measured_us = data->next_timer_us;
+
+ /* Update our correction ratio */
+ new_factor = data->correction_factor[data->bucket];
+ new_factor -= new_factor / DECAY;
+
+ if (data->next_timer_us > 0 && measured_us < MAX_INTERESTING)
+ new_factor += RESOLUTION * measured_us / data->next_timer_us;
+ else
+ /*
+ * we were idle so long that we count it as a perfect
+ * prediction
+ */
+ new_factor += RESOLUTION;
+
+ /*
+ * We don't want 0 as factor; we always want at least
+ * a tiny bit of estimated time. Fortunately, due to rounding,
+ * new_factor will stay nonzero regardless of measured_us values
+ * and the compiler can eliminate this test as long as DECAY > 1.
+ */
+ if (DECAY == 1 && unlikely(new_factor == 0))
+ new_factor = 1;
+
+ data->correction_factor[data->bucket] = new_factor;
+
+ /* update the repeating-pattern data */
+ data->intervals[data->interval_ptr++] = measured_us;
+ if (data->interval_ptr >= INTERVALS)
+ data->interval_ptr = 0;
+}
+
+/**
+ * menu_enable_device - scans a CPU's states and does setup
+ * @drv: cpuidle driver
+ * @dev: the CPU
+ */
+static int menu_enable_device(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev)
+{
+ struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
+ int i;
+
+ memset(data, 0, sizeof(struct menu_device));
+
+ /*
+ * if the correction factor is 0 (eg first time init or cpu hotplug
+ * etc), we actually want to start out with a unity factor.
+ */
+ for(i = 0; i < BUCKETS; i++)
+ data->correction_factor[i] = RESOLUTION * DECAY;
+
+ return 0;
+}
+
+static struct cpuidle_governor menu_governor = {
+ .name = "menu",
+ .rating = 20,
+ .enable = menu_enable_device,
+ .select = menu_select,
+ .reflect = menu_reflect,
+};
+
+/**
+ * init_menu - initializes the governor
+ */
+static int __init init_menu(void)
+{
+ return cpuidle_register_governor(&menu_governor);
+}
+
+postcore_initcall(init_menu);
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
new file mode 100644
index 000000000..36ff5a1d9
--- /dev/null
+++ b/drivers/cpuidle/poll_state.c
@@ -0,0 +1,56 @@
+/*
+ * poll_state.c - Polling idle state
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/idle.h>
+
+#define POLL_IDLE_TIME_LIMIT (TICK_NSEC / 16)
+#define POLL_IDLE_RELAX_COUNT 200
+
+static int __cpuidle poll_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ u64 time_start = local_clock();
+
+ dev->poll_time_limit = false;
+
+ local_irq_enable();
+ if (!current_set_polling_and_test()) {
+ unsigned int loop_count = 0;
+
+ while (!need_resched()) {
+ cpu_relax();
+ if (loop_count++ < POLL_IDLE_RELAX_COUNT)
+ continue;
+
+ loop_count = 0;
+ if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) {
+ dev->poll_time_limit = true;
+ break;
+ }
+ }
+ }
+ current_clr_polling();
+
+ return index;
+}
+
+void cpuidle_poll_state_init(struct cpuidle_driver *drv)
+{
+ struct cpuidle_state *state = &drv->states[0];
+
+ snprintf(state->name, CPUIDLE_NAME_LEN, "POLL");
+ snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE");
+ state->exit_latency = 0;
+ state->target_residency = 0;
+ state->power_usage = -1;
+ state->enter = poll_idle;
+ state->disabled = false;
+ state->flags = CPUIDLE_FLAG_POLLING;
+}
+EXPORT_SYMBOL_GPL(cpuidle_poll_state_init);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
new file mode 100644
index 000000000..d9b917529
--- /dev/null
+++ b/drivers/cpuidle/sysfs.c
@@ -0,0 +1,718 @@
+/*
+ * sysfs.c - sysfs support
+ *
+ * (C) 2006-2007 Shaohua Li <shaohua.li@intel.com>
+ *
+ * This code is licenced under the GPL.
+ */
+
+#include <linux/kernel.h>
+#include <linux/cpuidle.h>
+#include <linux/sysfs.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/completion.h>
+#include <linux/capability.h>
+#include <linux/device.h>
+#include <linux/kobject.h>
+
+#include "cpuidle.h"
+
+static unsigned int sysfs_switch;
+static int __init cpuidle_sysfs_setup(char *unused)
+{
+ sysfs_switch = 1;
+ return 1;
+}
+__setup("cpuidle_sysfs_switch", cpuidle_sysfs_setup);
+
+static ssize_t show_available_governors(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t i = 0;
+ struct cpuidle_governor *tmp;
+
+ mutex_lock(&cpuidle_lock);
+ list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
+ if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
+ CPUIDLE_NAME_LEN - 2))
+ goto out;
+ i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
+ }
+
+out:
+ i+= sprintf(&buf[i], "\n");
+ mutex_unlock(&cpuidle_lock);
+ return i;
+}
+
+static ssize_t show_current_driver(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret;
+ struct cpuidle_driver *drv;
+
+ spin_lock(&cpuidle_driver_lock);
+ drv = cpuidle_get_driver();
+ if (drv)
+ ret = sprintf(buf, "%s\n", drv->name);
+ else
+ ret = sprintf(buf, "none\n");
+ spin_unlock(&cpuidle_driver_lock);
+
+ return ret;
+}
+
+static ssize_t show_current_governor(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ ssize_t ret;
+
+ mutex_lock(&cpuidle_lock);
+ if (cpuidle_curr_governor)
+ ret = sprintf(buf, "%s\n", cpuidle_curr_governor->name);
+ else
+ ret = sprintf(buf, "none\n");
+ mutex_unlock(&cpuidle_lock);
+
+ return ret;
+}
+
+static ssize_t store_current_governor(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ char gov_name[CPUIDLE_NAME_LEN];
+ int ret = -EINVAL;
+ size_t len = count;
+ struct cpuidle_governor *gov;
+
+ if (!len || len >= sizeof(gov_name))
+ return -EINVAL;
+
+ memcpy(gov_name, buf, len);
+ gov_name[len] = '\0';
+ if (gov_name[len - 1] == '\n')
+ gov_name[--len] = '\0';
+
+ mutex_lock(&cpuidle_lock);
+
+ list_for_each_entry(gov, &cpuidle_governors, governor_list) {
+ if (strlen(gov->name) == len && !strcmp(gov->name, gov_name)) {
+ ret = cpuidle_switch_governor(gov);
+ break;
+ }
+ }
+
+ mutex_unlock(&cpuidle_lock);
+
+ if (ret)
+ return ret;
+ else
+ return count;
+}
+
+static DEVICE_ATTR(current_driver, 0444, show_current_driver, NULL);
+static DEVICE_ATTR(current_governor_ro, 0444, show_current_governor, NULL);
+
+static struct attribute *cpuidle_default_attrs[] = {
+ &dev_attr_current_driver.attr,
+ &dev_attr_current_governor_ro.attr,
+ NULL
+};
+
+static DEVICE_ATTR(available_governors, 0444, show_available_governors, NULL);
+static DEVICE_ATTR(current_governor, 0644, show_current_governor,
+ store_current_governor);
+
+static struct attribute *cpuidle_switch_attrs[] = {
+ &dev_attr_available_governors.attr,
+ &dev_attr_current_driver.attr,
+ &dev_attr_current_governor.attr,
+ NULL
+};
+
+static struct attribute_group cpuidle_attr_group = {
+ .attrs = cpuidle_default_attrs,
+ .name = "cpuidle",
+};
+
+/**
+ * cpuidle_add_interface - add CPU global sysfs attributes
+ */
+int cpuidle_add_interface(struct device *dev)
+{
+ if (sysfs_switch)
+ cpuidle_attr_group.attrs = cpuidle_switch_attrs;
+
+ return sysfs_create_group(&dev->kobj, &cpuidle_attr_group);
+}
+
+/**
+ * cpuidle_remove_interface - remove CPU global sysfs attributes
+ */
+void cpuidle_remove_interface(struct device *dev)
+{
+ sysfs_remove_group(&dev->kobj, &cpuidle_attr_group);
+}
+
+struct cpuidle_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cpuidle_device *, char *);
+ ssize_t (*store)(struct cpuidle_device *, const char *, size_t count);
+};
+
+#define define_one_ro(_name, show) \
+ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+#define define_one_rw(_name, show, store) \
+ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
+
+#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
+
+struct cpuidle_device_kobj {
+ struct cpuidle_device *dev;
+ struct completion kobj_unregister;
+ struct kobject kobj;
+};
+
+static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj)
+{
+ struct cpuidle_device_kobj *kdev =
+ container_of(kobj, struct cpuidle_device_kobj, kobj);
+
+ return kdev->dev;
+}
+
+static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ int ret = -EIO;
+ struct cpuidle_device *dev = to_cpuidle_device(kobj);
+ struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
+
+ if (cattr->show) {
+ mutex_lock(&cpuidle_lock);
+ ret = cattr->show(dev, buf);
+ mutex_unlock(&cpuidle_lock);
+ }
+ return ret;
+}
+
+static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ int ret = -EIO;
+ struct cpuidle_device *dev = to_cpuidle_device(kobj);
+ struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
+
+ if (cattr->store) {
+ mutex_lock(&cpuidle_lock);
+ ret = cattr->store(dev, buf, count);
+ mutex_unlock(&cpuidle_lock);
+ }
+ return ret;
+}
+
+static const struct sysfs_ops cpuidle_sysfs_ops = {
+ .show = cpuidle_show,
+ .store = cpuidle_store,
+};
+
+static void cpuidle_sysfs_release(struct kobject *kobj)
+{
+ struct cpuidle_device_kobj *kdev =
+ container_of(kobj, struct cpuidle_device_kobj, kobj);
+
+ complete(&kdev->kobj_unregister);
+}
+
+static struct kobj_type ktype_cpuidle = {
+ .sysfs_ops = &cpuidle_sysfs_ops,
+ .release = cpuidle_sysfs_release,
+};
+
+struct cpuidle_state_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cpuidle_state *, \
+ struct cpuidle_state_usage *, char *);
+ ssize_t (*store)(struct cpuidle_state *, \
+ struct cpuidle_state_usage *, const char *, size_t);
+};
+
+#define define_one_state_ro(_name, show) \
+static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0444, show, NULL)
+
+#define define_one_state_rw(_name, show, store) \
+static struct cpuidle_state_attr attr_##_name = __ATTR(_name, 0644, show, store)
+
+#define define_show_state_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, char *buf) \
+{ \
+ return sprintf(buf, "%u\n", state->_name);\
+}
+
+#define define_store_state_ull_function(_name) \
+static ssize_t store_state_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, \
+ const char *buf, size_t size) \
+{ \
+ unsigned long long value; \
+ int err; \
+ if (!capable(CAP_SYS_ADMIN)) \
+ return -EPERM; \
+ err = kstrtoull(buf, 0, &value); \
+ if (err) \
+ return err; \
+ if (value) \
+ state_usage->_name = 1; \
+ else \
+ state_usage->_name = 0; \
+ return size; \
+}
+
+#define define_show_state_ull_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%llu\n", state_usage->_name);\
+}
+
+#define define_show_state_str_function(_name) \
+static ssize_t show_state_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
+{ \
+ if (state->_name[0] == '\0')\
+ return sprintf(buf, "<null>\n");\
+ return sprintf(buf, "%s\n", state->_name);\
+}
+
+define_show_state_function(exit_latency)
+define_show_state_function(target_residency)
+define_show_state_function(power_usage)
+define_show_state_ull_function(usage)
+define_show_state_ull_function(time)
+define_show_state_str_function(name)
+define_show_state_str_function(desc)
+define_show_state_ull_function(disable)
+define_store_state_ull_function(disable)
+
+define_one_state_ro(name, show_state_name);
+define_one_state_ro(desc, show_state_desc);
+define_one_state_ro(latency, show_state_exit_latency);
+define_one_state_ro(residency, show_state_target_residency);
+define_one_state_ro(power, show_state_power_usage);
+define_one_state_ro(usage, show_state_usage);
+define_one_state_ro(time, show_state_time);
+define_one_state_rw(disable, show_state_disable, store_state_disable);
+
+static struct attribute *cpuidle_state_default_attrs[] = {
+ &attr_name.attr,
+ &attr_desc.attr,
+ &attr_latency.attr,
+ &attr_residency.attr,
+ &attr_power.attr,
+ &attr_usage.attr,
+ &attr_time.attr,
+ &attr_disable.attr,
+ NULL
+};
+
+struct cpuidle_state_kobj {
+ struct cpuidle_state *state;
+ struct cpuidle_state_usage *state_usage;
+ struct completion kobj_unregister;
+ struct kobject kobj;
+};
+
+#ifdef CONFIG_SUSPEND
+#define define_show_state_s2idle_ull_function(_name) \
+static ssize_t show_state_s2idle_##_name(struct cpuidle_state *state, \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
+{ \
+ return sprintf(buf, "%llu\n", state_usage->s2idle_##_name);\
+}
+
+define_show_state_s2idle_ull_function(usage);
+define_show_state_s2idle_ull_function(time);
+
+#define define_one_state_s2idle_ro(_name, show) \
+static struct cpuidle_state_attr attr_s2idle_##_name = \
+ __ATTR(_name, 0444, show, NULL)
+
+define_one_state_s2idle_ro(usage, show_state_s2idle_usage);
+define_one_state_s2idle_ro(time, show_state_s2idle_time);
+
+static struct attribute *cpuidle_state_s2idle_attrs[] = {
+ &attr_s2idle_usage.attr,
+ &attr_s2idle_time.attr,
+ NULL
+};
+
+static const struct attribute_group cpuidle_state_s2idle_group = {
+ .name = "s2idle",
+ .attrs = cpuidle_state_s2idle_attrs,
+};
+
+static void cpuidle_add_s2idle_attr_group(struct cpuidle_state_kobj *kobj)
+{
+ int ret;
+
+ if (!kobj->state->enter_s2idle)
+ return;
+
+ ret = sysfs_create_group(&kobj->kobj, &cpuidle_state_s2idle_group);
+ if (ret)
+ pr_debug("%s: sysfs attribute group not created\n", __func__);
+}
+
+static void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *kobj)
+{
+ if (kobj->state->enter_s2idle)
+ sysfs_remove_group(&kobj->kobj, &cpuidle_state_s2idle_group);
+}
+#else
+static inline void cpuidle_add_s2idle_attr_group(struct cpuidle_state_kobj *kobj) { }
+static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *kobj) { }
+#endif /* CONFIG_SUSPEND */
+
+#define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj)
+#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
+#define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
+#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
+
+static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
+ char * buf)
+{
+ int ret = -EIO;
+ struct cpuidle_state *state = kobj_to_state(kobj);
+ struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
+ struct cpuidle_state_attr * cattr = attr_to_stateattr(attr);
+
+ if (cattr->show)
+ ret = cattr->show(state, state_usage, buf);
+
+ return ret;
+}
+
+static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t size)
+{
+ int ret = -EIO;
+ struct cpuidle_state *state = kobj_to_state(kobj);
+ struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj);
+ struct cpuidle_state_attr *cattr = attr_to_stateattr(attr);
+
+ if (cattr->store)
+ ret = cattr->store(state, state_usage, buf, size);
+
+ return ret;
+}
+
+static const struct sysfs_ops cpuidle_state_sysfs_ops = {
+ .show = cpuidle_state_show,
+ .store = cpuidle_state_store,
+};
+
+static void cpuidle_state_sysfs_release(struct kobject *kobj)
+{
+ struct cpuidle_state_kobj *state_obj = kobj_to_state_obj(kobj);
+
+ complete(&state_obj->kobj_unregister);
+}
+
+static struct kobj_type ktype_state_cpuidle = {
+ .sysfs_ops = &cpuidle_state_sysfs_ops,
+ .default_attrs = cpuidle_state_default_attrs,
+ .release = cpuidle_state_sysfs_release,
+};
+
+static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i)
+{
+ cpuidle_remove_s2idle_attr_group(device->kobjs[i]);
+ kobject_put(&device->kobjs[i]->kobj);
+ wait_for_completion(&device->kobjs[i]->kobj_unregister);
+ kfree(device->kobjs[i]);
+ device->kobjs[i] = NULL;
+}
+
+/**
+ * cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes
+ * @device: the target device
+ */
+static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
+{
+ int i, ret = -ENOMEM;
+ struct cpuidle_state_kobj *kobj;
+ struct cpuidle_device_kobj *kdev = device->kobj_dev;
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
+
+ /* state statistics */
+ for (i = 0; i < drv->state_count; i++) {
+ kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL);
+ if (!kobj) {
+ ret = -ENOMEM;
+ goto error_state;
+ }
+ kobj->state = &drv->states[i];
+ kobj->state_usage = &device->states_usage[i];
+ init_completion(&kobj->kobj_unregister);
+
+ ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
+ &kdev->kobj, "state%d", i);
+ if (ret) {
+ kobject_put(&kobj->kobj);
+ kfree(kobj);
+ goto error_state;
+ }
+ cpuidle_add_s2idle_attr_group(kobj);
+ kobject_uevent(&kobj->kobj, KOBJ_ADD);
+ device->kobjs[i] = kobj;
+ }
+
+ return 0;
+
+error_state:
+ for (i = i - 1; i >= 0; i--)
+ cpuidle_free_state_kobj(device, i);
+ return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes
+ * @device: the target device
+ */
+static void cpuidle_remove_state_sysfs(struct cpuidle_device *device)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
+ int i;
+
+ for (i = 0; i < drv->state_count; i++)
+ cpuidle_free_state_kobj(device, i);
+}
+
+#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS
+#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj)
+#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr)
+
+#define define_one_driver_ro(_name, show) \
+ static struct cpuidle_driver_attr attr_driver_##_name = \
+ __ATTR(_name, 0444, show, NULL)
+
+struct cpuidle_driver_kobj {
+ struct cpuidle_driver *drv;
+ struct completion kobj_unregister;
+ struct kobject kobj;
+};
+
+struct cpuidle_driver_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct cpuidle_driver *, char *);
+ ssize_t (*store)(struct cpuidle_driver *, const char *, size_t);
+};
+
+static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf)
+{
+ ssize_t ret;
+
+ spin_lock(&cpuidle_driver_lock);
+ ret = sprintf(buf, "%s\n", drv ? drv->name : "none");
+ spin_unlock(&cpuidle_driver_lock);
+
+ return ret;
+}
+
+static void cpuidle_driver_sysfs_release(struct kobject *kobj)
+{
+ struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+ complete(&driver_kobj->kobj_unregister);
+}
+
+static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ int ret = -EIO;
+ struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+ struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
+
+ if (dattr->show)
+ ret = dattr->show(driver_kobj->drv, buf);
+
+ return ret;
+}
+
+static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t size)
+{
+ int ret = -EIO;
+ struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
+ struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr);
+
+ if (dattr->store)
+ ret = dattr->store(driver_kobj->drv, buf, size);
+
+ return ret;
+}
+
+define_one_driver_ro(name, show_driver_name);
+
+static const struct sysfs_ops cpuidle_driver_sysfs_ops = {
+ .show = cpuidle_driver_show,
+ .store = cpuidle_driver_store,
+};
+
+static struct attribute *cpuidle_driver_default_attrs[] = {
+ &attr_driver_name.attr,
+ NULL
+};
+
+static struct kobj_type ktype_driver_cpuidle = {
+ .sysfs_ops = &cpuidle_driver_sysfs_ops,
+ .default_attrs = cpuidle_driver_default_attrs,
+ .release = cpuidle_driver_sysfs_release,
+};
+
+/**
+ * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute
+ * @device: the target device
+ */
+static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_driver_kobj *kdrv;
+ struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+ int ret;
+
+ kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL);
+ if (!kdrv)
+ return -ENOMEM;
+
+ kdrv->drv = drv;
+ init_completion(&kdrv->kobj_unregister);
+
+ ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
+ &kdev->kobj, "driver");
+ if (ret) {
+ kobject_put(&kdrv->kobj);
+ kfree(kdrv);
+ return ret;
+ }
+
+ kobject_uevent(&kdrv->kobj, KOBJ_ADD);
+ dev->kobj_driver = kdrv;
+
+ return ret;
+}
+
+/**
+ * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute
+ * @device: the target device
+ */
+static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_driver_kobj *kdrv = dev->kobj_driver;
+ kobject_put(&kdrv->kobj);
+ wait_for_completion(&kdrv->kobj_unregister);
+ kfree(kdrv);
+}
+#else
+static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
+{
+ return 0;
+}
+
+static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev)
+{
+ ;
+}
+#endif
+
+/**
+ * cpuidle_add_device_sysfs - adds device specific sysfs attributes
+ * @device: the target device
+ */
+int cpuidle_add_device_sysfs(struct cpuidle_device *device)
+{
+ int ret;
+
+ ret = cpuidle_add_state_sysfs(device);
+ if (ret)
+ return ret;
+
+ ret = cpuidle_add_driver_sysfs(device);
+ if (ret)
+ cpuidle_remove_state_sysfs(device);
+ return ret;
+}
+
+/**
+ * cpuidle_remove_device_sysfs : removes device specific sysfs attributes
+ * @device : the target device
+ */
+void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
+{
+ cpuidle_remove_driver_sysfs(device);
+ cpuidle_remove_state_sysfs(device);
+}
+
+/**
+ * cpuidle_add_sysfs - creates a sysfs instance for the target device
+ * @dev: the target device
+ */
+int cpuidle_add_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_device_kobj *kdev;
+ struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
+ int error;
+
+ /*
+ * Return if cpu_device is not setup for this CPU.
+ *
+ * This could happen if the arch did not set up cpu_device
+ * since this CPU is not in cpu_present mask and the
+ * driver did not send a correct CPU mask during registration.
+ * Without this check we would end up passing bogus
+ * value for &cpu_dev->kobj in kobject_init_and_add()
+ */
+ if (!cpu_dev)
+ return -ENODEV;
+
+ kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+ if (!kdev)
+ return -ENOMEM;
+ kdev->dev = dev;
+
+ init_completion(&kdev->kobj_unregister);
+
+ error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
+ "cpuidle");
+ if (error) {
+ kobject_put(&kdev->kobj);
+ kfree(kdev);
+ return error;
+ }
+
+ dev->kobj_dev = kdev;
+ kobject_uevent(&kdev->kobj, KOBJ_ADD);
+
+ return 0;
+}
+
+/**
+ * cpuidle_remove_sysfs - deletes a sysfs instance on the target device
+ * @dev: the target device
+ */
+void cpuidle_remove_sysfs(struct cpuidle_device *dev)
+{
+ struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+
+ kobject_put(&kdev->kobj);
+ wait_for_completion(&kdev->kobj_unregister);
+ kfree(kdev);
+}