summaryrefslogtreecommitdiffstats
path: root/drivers/thermal/intel
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-11 08:27:49 +0000
commitace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
treeb2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/thermal/intel
parentInitial commit. (diff)
downloadlinux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/thermal/intel')
-rw-r--r--drivers/thermal/intel/Kconfig118
-rw-r--r--drivers/thermal/intel/Makefile16
-rw-r--r--drivers/thermal/intel/int340x_thermal/Kconfig49
-rw-r--r--drivers/thermal/intel/int340x_thermal/Makefile14
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c595
-rw-r--r--drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h146
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3400_thermal.c728
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3401_thermal.c77
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3402_thermal.c104
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3403_thermal.c307
-rw-r--r--drivers/thermal/intel/int340x_thermal/int3406_thermal.c209
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c260
-rw-r--r--drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h55
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.c385
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device.h96
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c373
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c158
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c241
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c135
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c386
-rw-r--r--drivers/thermal/intel/intel_bxt_pmic_thermal.c290
-rw-r--r--drivers/thermal/intel/intel_hfi.c640
-rw-r--r--drivers/thermal/intel/intel_hfi.h17
-rw-r--r--drivers/thermal/intel/intel_pch_thermal.c401
-rw-r--r--drivers/thermal/intel/intel_powerclamp.c849
-rw-r--r--drivers/thermal/intel/intel_quark_dts_thermal.c423
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.c404
-rw-r--r--drivers/thermal/intel/intel_soc_dts_iosf.h52
-rw-r--r--drivers/thermal/intel/intel_soc_dts_thermal.c109
-rw-r--r--drivers/thermal/intel/intel_tcc.c139
-rw-r--r--drivers/thermal/intel/intel_tcc_cooling.c124
-rw-r--r--drivers/thermal/intel/therm_throt.c815
-rw-r--r--drivers/thermal/intel/thermal_interrupt.h24
-rw-r--r--drivers/thermal/intel/x86_pkg_temp_thermal.c538
34 files changed, 9277 insertions, 0 deletions
diff --git a/drivers/thermal/intel/Kconfig b/drivers/thermal/intel/Kconfig
new file mode 100644
index 0000000000..ecd7e07eec
--- /dev/null
+++ b/drivers/thermal/intel/Kconfig
@@ -0,0 +1,118 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INTEL_POWERCLAMP
+ tristate "Intel PowerClamp idle injection driver"
+ depends on X86
+ depends on CPU_SUP_INTEL
+ depends on CPU_IDLE
+ select POWERCAP
+ select IDLE_INJECT
+ help
+ Enable this to enable Intel PowerClamp idle injection driver. This
+ enforce idle time which results in more package C-state residency. The
+ user interface is exposed via generic thermal framework.
+
+config X86_THERMAL_VECTOR
+ def_bool y
+ depends on X86 && CPU_SUP_INTEL && X86_LOCAL_APIC
+
+config INTEL_TCC
+ bool
+ depends on X86
+
+config X86_PKG_TEMP_THERMAL
+ tristate "X86 package temperature thermal driver"
+ depends on X86_THERMAL_VECTOR
+ select THERMAL_GOV_USER_SPACE
+ select THERMAL_WRITABLE_TRIPS
+ select INTEL_TCC
+ default m
+ help
+ Enable this to register CPU digital sensor for package temperature as
+ thermal zone. Each package will have its own thermal zone. There are
+ two trip points which can be set by user to get notifications via thermal
+ notification methods.
+
+config INTEL_SOC_DTS_IOSF_CORE
+ tristate
+ depends on X86 && PCI
+ select IOSF_MBI
+ select INTEL_TCC
+ help
+ This is becoming a common feature for Intel SoCs to expose the additional
+ digital temperature sensors (DTSs) using side band interface (IOSF). This
+ implements the common set of helper functions to register, get temperature
+ and get/set thresholds on DTSs.
+
+config INTEL_SOC_DTS_THERMAL
+ tristate "Intel SoCs DTS thermal driver"
+ depends on X86 && PCI && ACPI
+ select INTEL_SOC_DTS_IOSF_CORE
+ select THERMAL_WRITABLE_TRIPS
+ help
+ Enable this to register Intel SoCs (e.g. Bay Trail) platform digital
+ temperature sensor (DTS). These SoCs have two additional DTSs in
+ addition to DTSs on CPU cores. Each DTS will be registered as a
+ thermal zone. There are two trip points. One of the trip point can
+ be set by user mode programs to get notifications via Linux thermal
+ notification methods.The other trip is a critical trip point, which
+ was set by the driver based on the TJ MAX temperature.
+
+config INTEL_QUARK_DTS_THERMAL
+ tristate "Intel Quark DTS thermal driver"
+ depends on X86_INTEL_QUARK
+ help
+ Enable this to register Intel Quark SoC (e.g. X1000) platform digital
+ temperature sensor (DTS). For X1000 SoC, it has one on-die DTS.
+ The DTS will be registered as a thermal zone. There are two trip points:
+ hot & critical. The critical trip point default value is set by
+ underlying BIOS/Firmware.
+
+menu "ACPI INT340X thermal drivers"
+source "drivers/thermal/intel/int340x_thermal/Kconfig"
+endmenu
+
+config INTEL_BXT_PMIC_THERMAL
+ tristate "Intel Broxton PMIC thermal driver"
+ depends on X86 && INTEL_SOC_PMIC_BXTWC
+ select REGMAP
+ help
+ Select this driver for Intel Broxton PMIC with ADC channels monitoring
+ system temperature measurements and alerts.
+ This driver is used for monitoring the ADC channels of PMIC and handles
+ the alert trip point interrupts and notifies the thermal framework with
+ the trip point and temperature details of the zone.
+
+config INTEL_PCH_THERMAL
+ tristate "Intel PCH Thermal Reporting Driver"
+ depends on X86 && PCI
+ select THERMAL_ACPI if ACPI
+ help
+ Enable this to support thermal reporting on certain intel PCHs.
+ Thermal reporting device will provide temperature reading,
+ programmable trip points and other information.
+
+config INTEL_TCC_COOLING
+ tristate "Intel TCC offset cooling Driver"
+ depends on X86
+ select INTEL_TCC
+ help
+ Enable this to support system cooling by adjusting the effective TCC
+ activation temperature via the TCC Offset register, which is widely
+ supported on modern Intel platforms.
+ Note that, on different platforms, the behavior might be different
+ on how fast the setting takes effect, and how much the CPU frequency
+ is reduced.
+
+config INTEL_HFI_THERMAL
+ bool "Intel Hardware Feedback Interface"
+ depends on NET
+ depends on CPU_SUP_INTEL
+ depends on X86_THERMAL_VECTOR
+ select THERMAL_NETLINK
+ help
+ Select this option to enable the Hardware Feedback Interface. If
+ selected, hardware provides guidance to the operating system on
+ the performance and energy efficiency capabilities of each CPU.
+ These capabilities may change as a result of changes in the operating
+ conditions of the system such power and thermal limits. If selected,
+ the kernel relays updates in CPUs' capabilities to userspace.
diff --git a/drivers/thermal/intel/Makefile b/drivers/thermal/intel/Makefile
new file mode 100644
index 0000000000..182b341130
--- /dev/null
+++ b/drivers/thermal/intel/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for various Intel thermal drivers.
+
+obj-$(CONFIG_INTEL_TCC) += intel_tcc.o
+obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o
+obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o
+obj-$(CONFIG_INTEL_SOC_DTS_IOSF_CORE) += intel_soc_dts_iosf.o
+obj-$(CONFIG_INTEL_SOC_DTS_THERMAL) += intel_soc_dts_thermal.o
+obj-$(CONFIG_INTEL_QUARK_DTS_THERMAL) += intel_quark_dts_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
+obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
+obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
+obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o
+obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
+obj-$(CONFIG_INTEL_HFI_THERMAL) += intel_hfi.o
diff --git a/drivers/thermal/intel/int340x_thermal/Kconfig b/drivers/thermal/intel/int340x_thermal/Kconfig
new file mode 100644
index 0000000000..300ea53e9b
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/Kconfig
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ACPI INT340x thermal drivers configuration
+#
+
+config INT340X_THERMAL
+ tristate "ACPI INT340X thermal drivers"
+ depends on X86_64 && ACPI && PCI
+ select THERMAL_GOV_USER_SPACE
+ select ACPI_THERMAL_REL
+ select ACPI_FAN
+ select THERMAL_ACPI
+ select INTEL_SOC_DTS_IOSF_CORE
+ select INTEL_TCC
+ select PROC_THERMAL_MMIO_RAPL if POWERCAP
+ help
+ Newer laptops and tablets that use ACPI may have thermal sensors and
+ other devices with thermal control capabilities outside the core
+ CPU/SOC, for thermal safety reasons.
+ They are exposed for the OS to use via the INT3400 ACPI device object
+ as the master, and INT3401~INT340B ACPI device objects as the slaves.
+ Enable this to expose the temperature information and cooling ability
+ from these objects to userspace via the normal thermal framework.
+ This means that a wide range of applications and GUI widgets can show
+ the information to the user or use this information for making
+ decisions. For example, the Intel Thermal Daemon can use this
+ information to allow the user to select his laptop to run without
+ turning on the fans.
+
+config ACPI_THERMAL_REL
+ tristate
+ depends on ACPI
+
+if INT340X_THERMAL
+
+config INT3406_THERMAL
+ tristate "ACPI INT3406 display thermal driver"
+ depends on ACPI_VIDEO
+ help
+ The display thermal device represents the LED/LCD display panel
+ that may or may not include touch support. The main function of
+ the display thermal device is to allow control of the display
+ brightness in order to address a thermal condition or to reduce
+ power consumed by display device.
+
+config PROC_THERMAL_MMIO_RAPL
+ tristate
+ select INTEL_RAPL_CORE
+endif
diff --git a/drivers/thermal/intel/int340x_thermal/Makefile b/drivers/thermal/intel/int340x_thermal/Makefile
new file mode 100644
index 0000000000..4e852ce4a5
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INT340X_THERMAL) += int3400_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal_zone.o
+obj-$(CONFIG_INT340X_THERMAL) += int3402_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += int3403_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device.o
+obj-$(CONFIG_INT340X_THERMAL) += int3401_thermal.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device_pci_legacy.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_device_pci.o
+obj-$(CONFIG_PROC_THERMAL_MMIO_RAPL) += processor_thermal_rapl.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_rfim.o
+obj-$(CONFIG_INT340X_THERMAL) += processor_thermal_mbox.o
+obj-$(CONFIG_INT3406_THERMAL) += int3406_thermal.o
+obj-$(CONFIG_ACPI_THERMAL_REL) += acpi_thermal_rel.o
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
new file mode 100644
index 0000000000..dc519a665c
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* acpi_thermal_rel.c driver for exporting ACPI thermal relationship
+ *
+ * Copyright (c) 2014 Intel Corp
+ */
+
+/*
+ * Two functionalities included:
+ * 1. Export _TRT, _ART, via misc device interface to the userspace.
+ * 2. Provide parsing result to kernel drivers
+ *
+ */
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/acpi.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include "acpi_thermal_rel.h"
+
+static acpi_handle acpi_thermal_rel_handle;
+static DEFINE_SPINLOCK(acpi_thermal_rel_chrdev_lock);
+static int acpi_thermal_rel_chrdev_count; /* #times opened */
+static int acpi_thermal_rel_chrdev_exclu; /* already open exclusive? */
+
+static int acpi_thermal_rel_open(struct inode *inode, struct file *file)
+{
+ spin_lock(&acpi_thermal_rel_chrdev_lock);
+ if (acpi_thermal_rel_chrdev_exclu ||
+ (acpi_thermal_rel_chrdev_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+ return -EBUSY;
+ }
+
+ if (file->f_flags & O_EXCL)
+ acpi_thermal_rel_chrdev_exclu = 1;
+ acpi_thermal_rel_chrdev_count++;
+
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+
+ return nonseekable_open(inode, file);
+}
+
+static int acpi_thermal_rel_release(struct inode *inode, struct file *file)
+{
+ spin_lock(&acpi_thermal_rel_chrdev_lock);
+ acpi_thermal_rel_chrdev_count--;
+ acpi_thermal_rel_chrdev_exclu = 0;
+ spin_unlock(&acpi_thermal_rel_chrdev_lock);
+
+ return 0;
+}
+
+/**
+ * acpi_parse_trt - Thermal Relationship Table _TRT for passive cooling
+ *
+ * @handle: ACPI handle of the device contains _TRT
+ * @trt_count: the number of valid entries resulted from parsing _TRT
+ * @trtp: pointer to pointer of array of _TRT entries in parsing result
+ * @create_dev: whether to create platform devices for target and source
+ *
+ */
+int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trtp,
+ bool create_dev)
+{
+ acpi_status status;
+ int result = 0;
+ int i;
+ int nr_bad_entries = 0;
+ struct trt *trts;
+ union acpi_object *p;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_buffer element = { 0, NULL };
+ struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
+
+ status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buffer.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ pr_err("Invalid _TRT data\n");
+ result = -EFAULT;
+ goto end;
+ }
+
+ *trt_count = p->package.count;
+ trts = kcalloc(*trt_count, sizeof(struct trt), GFP_KERNEL);
+ if (!trts) {
+ result = -ENOMEM;
+ goto end;
+ }
+
+ for (i = 0; i < *trt_count; i++) {
+ struct trt *trt = &trts[i - nr_bad_entries];
+
+ element.length = sizeof(struct trt);
+ element.pointer = trt;
+
+ status = acpi_extract_package(&(p->package.elements[i]),
+ &trt_format, &element);
+ if (ACPI_FAILURE(status)) {
+ nr_bad_entries++;
+ pr_warn("_TRT package %d is invalid, ignored\n", i);
+ continue;
+ }
+ if (!create_dev)
+ continue;
+
+ if (!acpi_fetch_acpi_dev(trt->source))
+ pr_warn("Failed to get source ACPI device\n");
+
+ if (!acpi_fetch_acpi_dev(trt->target))
+ pr_warn("Failed to get target ACPI device\n");
+ }
+
+ result = 0;
+
+ *trtp = trts;
+ /* don't count bad entries */
+ *trt_count -= nr_bad_entries;
+end:
+ kfree(buffer.pointer);
+ return result;
+}
+EXPORT_SYMBOL(acpi_parse_trt);
+
+/**
+ * acpi_parse_art - Parse Active Relationship Table _ART
+ *
+ * @handle: ACPI handle of the device contains _ART
+ * @art_count: the number of valid entries resulted from parsing _ART
+ * @artp: pointer to pointer of array of art entries in parsing result
+ * @create_dev: whether to create platform devices for target and source
+ *
+ */
+int acpi_parse_art(acpi_handle handle, int *art_count, struct art **artp,
+ bool create_dev)
+{
+ acpi_status status;
+ int result = 0;
+ int i;
+ int nr_bad_entries = 0;
+ struct art *arts;
+ union acpi_object *p;
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ struct acpi_buffer element = { 0, NULL };
+ struct acpi_buffer art_format = {
+ sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
+
+ status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buffer.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ pr_err("Invalid _ART data\n");
+ result = -EFAULT;
+ goto end;
+ }
+
+ /* ignore p->package.elements[0], as this is _ART Revision field */
+ *art_count = p->package.count - 1;
+ arts = kcalloc(*art_count, sizeof(struct art), GFP_KERNEL);
+ if (!arts) {
+ result = -ENOMEM;
+ goto end;
+ }
+
+ for (i = 0; i < *art_count; i++) {
+ struct art *art = &arts[i - nr_bad_entries];
+
+ element.length = sizeof(struct art);
+ element.pointer = art;
+
+ status = acpi_extract_package(&(p->package.elements[i + 1]),
+ &art_format, &element);
+ if (ACPI_FAILURE(status)) {
+ pr_warn("_ART package %d is invalid, ignored", i);
+ nr_bad_entries++;
+ continue;
+ }
+ if (!create_dev)
+ continue;
+
+ if (!acpi_fetch_acpi_dev(art->source))
+ pr_warn("Failed to get source ACPI device\n");
+
+ if (!acpi_fetch_acpi_dev(art->target))
+ pr_warn("Failed to get target ACPI device\n");
+ }
+
+ *artp = arts;
+ /* don't count bad entries */
+ *art_count -= nr_bad_entries;
+end:
+ kfree(buffer.pointer);
+ return result;
+}
+EXPORT_SYMBOL(acpi_parse_art);
+
+/*
+ * acpi_parse_psvt - Passive Table (PSVT) for passive cooling
+ *
+ * @handle: ACPI handle of the device which contains PSVT
+ * @psvt_count: the number of valid entries resulted from parsing PSVT
+ * @psvtp: pointer to array of psvt entries
+ *
+ */
+static int acpi_parse_psvt(acpi_handle handle, int *psvt_count, struct psvt **psvtp)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ int nr_bad_entries = 0, revision = 0;
+ union acpi_object *p;
+ acpi_status status;
+ int i, result = 0;
+ struct psvt *psvts;
+
+ if (!acpi_has_method(handle, "PSVT"))
+ return -ENODEV;
+
+ status = acpi_evaluate_object(handle, "PSVT", NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buffer.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ result = -EFAULT;
+ goto end;
+ }
+
+ /* first package is the revision number */
+ if (p->package.count > 0) {
+ union acpi_object *prev = &(p->package.elements[0]);
+
+ if (prev->type == ACPI_TYPE_INTEGER)
+ revision = (int)prev->integer.value;
+ } else {
+ result = -EFAULT;
+ goto end;
+ }
+
+ /* Support only version 2 */
+ if (revision != 2) {
+ result = -EFAULT;
+ goto end;
+ }
+
+ *psvt_count = p->package.count - 1;
+ if (!*psvt_count) {
+ result = -EFAULT;
+ goto end;
+ }
+
+ psvts = kcalloc(*psvt_count, sizeof(*psvts), GFP_KERNEL);
+ if (!psvts) {
+ result = -ENOMEM;
+ goto end;
+ }
+
+ /* Start index is 1 because the first package is the revision number */
+ for (i = 1; i < p->package.count; i++) {
+ struct acpi_buffer psvt_int_format = { sizeof("RRNNNNNNNNNN"), "RRNNNNNNNNNN" };
+ struct acpi_buffer psvt_str_format = { sizeof("RRNNNNNSNNNN"), "RRNNNNNSNNNN" };
+ union acpi_object *package = &(p->package.elements[i]);
+ struct psvt *psvt = &psvts[i - 1 - nr_bad_entries];
+ struct acpi_buffer *psvt_format = &psvt_int_format;
+ struct acpi_buffer element = { 0, NULL };
+ union acpi_object *knob;
+ struct acpi_device *res;
+ struct psvt *psvt_ptr;
+
+ element.length = ACPI_ALLOCATE_BUFFER;
+ element.pointer = NULL;
+
+ if (package->package.count >= ACPI_NR_PSVT_ELEMENTS) {
+ knob = &(package->package.elements[ACPI_PSVT_CONTROL_KNOB]);
+ } else {
+ nr_bad_entries++;
+ pr_info("PSVT package %d is invalid, ignored\n", i);
+ continue;
+ }
+
+ if (knob->type == ACPI_TYPE_STRING) {
+ psvt_format = &psvt_str_format;
+ if (knob->string.length > ACPI_LIMIT_STR_MAX_LEN - 1) {
+ pr_info("PSVT package %d limit string len exceeds max\n", i);
+ knob->string.length = ACPI_LIMIT_STR_MAX_LEN - 1;
+ }
+ }
+
+ status = acpi_extract_package(&(p->package.elements[i]), psvt_format, &element);
+ if (ACPI_FAILURE(status)) {
+ nr_bad_entries++;
+ pr_info("PSVT package %d is invalid, ignored\n", i);
+ continue;
+ }
+
+ psvt_ptr = (struct psvt *)element.pointer;
+
+ memcpy(psvt, psvt_ptr, sizeof(*psvt));
+
+ /* The limit element can be string or U64 */
+ psvt->control_knob_type = (u64)knob->type;
+
+ if (knob->type == ACPI_TYPE_STRING) {
+ memset(&psvt->limit, 0, sizeof(u64));
+ strncpy(psvt->limit.string, psvt_ptr->limit.str_ptr, knob->string.length);
+ } else {
+ psvt->limit.integer = psvt_ptr->limit.integer;
+ }
+
+ kfree(element.pointer);
+
+ res = acpi_fetch_acpi_dev(psvt->source);
+ if (!res) {
+ nr_bad_entries++;
+ pr_info("Failed to get source ACPI device\n");
+ continue;
+ }
+
+ res = acpi_fetch_acpi_dev(psvt->target);
+ if (!res) {
+ nr_bad_entries++;
+ pr_info("Failed to get target ACPI device\n");
+ continue;
+ }
+ }
+
+ /* don't count bad entries */
+ *psvt_count -= nr_bad_entries;
+
+ if (!*psvt_count) {
+ result = -EFAULT;
+ kfree(psvts);
+ goto end;
+ }
+
+ *psvtp = psvts;
+
+ return 0;
+
+end:
+ kfree(buffer.pointer);
+ return result;
+}
+
+/* get device name from acpi handle */
+static void get_single_name(acpi_handle handle, char *name)
+{
+ struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER};
+
+ if (ACPI_FAILURE(acpi_get_name(handle, ACPI_SINGLE_NAME, &buffer)))
+ pr_warn("Failed to get device name from acpi handle\n");
+ else {
+ memcpy(name, buffer.pointer, ACPI_NAMESEG_SIZE);
+ kfree(buffer.pointer);
+ }
+}
+
+static int fill_art(char __user *ubuf)
+{
+ int i;
+ int ret;
+ int count;
+ int art_len;
+ struct art *arts = NULL;
+ union art_object *art_user;
+
+ ret = acpi_parse_art(acpi_thermal_rel_handle, &count, &arts, false);
+ if (ret)
+ goto free_art;
+ art_len = count * sizeof(union art_object);
+ art_user = kzalloc(art_len, GFP_KERNEL);
+ if (!art_user) {
+ ret = -ENOMEM;
+ goto free_art;
+ }
+ /* now fill in user art data */
+ for (i = 0; i < count; i++) {
+ /* userspace art needs device name instead of acpi reference */
+ get_single_name(arts[i].source, art_user[i].source_device);
+ get_single_name(arts[i].target, art_user[i].target_device);
+ /* copy the rest int data in addition to source and target */
+ BUILD_BUG_ON(sizeof(art_user[i].data) !=
+ sizeof(u64) * (ACPI_NR_ART_ELEMENTS - 2));
+ memcpy(&art_user[i].data, &arts[i].data, sizeof(art_user[i].data));
+ }
+
+ if (copy_to_user(ubuf, art_user, art_len))
+ ret = -EFAULT;
+ kfree(art_user);
+free_art:
+ kfree(arts);
+ return ret;
+}
+
+static int fill_trt(char __user *ubuf)
+{
+ int i;
+ int ret;
+ int count;
+ int trt_len;
+ struct trt *trts = NULL;
+ union trt_object *trt_user;
+
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, &count, &trts, false);
+ if (ret)
+ goto free_trt;
+ trt_len = count * sizeof(union trt_object);
+ trt_user = kzalloc(trt_len, GFP_KERNEL);
+ if (!trt_user) {
+ ret = -ENOMEM;
+ goto free_trt;
+ }
+ /* now fill in user trt data */
+ for (i = 0; i < count; i++) {
+ /* userspace trt needs device name instead of acpi reference */
+ get_single_name(trts[i].source, trt_user[i].source_device);
+ get_single_name(trts[i].target, trt_user[i].target_device);
+ trt_user[i].sample_period = trts[i].sample_period;
+ trt_user[i].influence = trts[i].influence;
+ }
+
+ if (copy_to_user(ubuf, trt_user, trt_len))
+ ret = -EFAULT;
+ kfree(trt_user);
+free_trt:
+ kfree(trts);
+ return ret;
+}
+
+static int fill_psvt(char __user *ubuf)
+{
+ int i, ret, count, psvt_len;
+ union psvt_object *psvt_user;
+ struct psvt *psvts;
+
+ ret = acpi_parse_psvt(acpi_thermal_rel_handle, &count, &psvts);
+ if (ret)
+ return ret;
+
+ psvt_len = count * sizeof(*psvt_user);
+
+ psvt_user = kzalloc(psvt_len, GFP_KERNEL);
+ if (!psvt_user) {
+ ret = -ENOMEM;
+ goto free_psvt;
+ }
+
+ /* now fill in user psvt data */
+ for (i = 0; i < count; i++) {
+ /* userspace psvt needs device name instead of acpi reference */
+ get_single_name(psvts[i].source, psvt_user[i].source_device);
+ get_single_name(psvts[i].target, psvt_user[i].target_device);
+
+ psvt_user[i].priority = psvts[i].priority;
+ psvt_user[i].sample_period = psvts[i].sample_period;
+ psvt_user[i].passive_temp = psvts[i].passive_temp;
+ psvt_user[i].source_domain = psvts[i].source_domain;
+ psvt_user[i].control_knob = psvts[i].control_knob;
+ psvt_user[i].step_size = psvts[i].step_size;
+ psvt_user[i].limit_coeff = psvts[i].limit_coeff;
+ psvt_user[i].unlimit_coeff = psvts[i].unlimit_coeff;
+ psvt_user[i].control_knob_type = psvts[i].control_knob_type;
+ if (psvt_user[i].control_knob_type == ACPI_TYPE_STRING)
+ strncpy(psvt_user[i].limit.string, psvts[i].limit.string,
+ ACPI_LIMIT_STR_MAX_LEN);
+ else
+ psvt_user[i].limit.integer = psvts[i].limit.integer;
+
+ }
+
+ if (copy_to_user(ubuf, psvt_user, psvt_len))
+ ret = -EFAULT;
+
+ kfree(psvt_user);
+
+free_psvt:
+ kfree(psvts);
+ return ret;
+}
+
+static long acpi_thermal_rel_ioctl(struct file *f, unsigned int cmd,
+ unsigned long __arg)
+{
+ int ret = 0;
+ unsigned long length = 0;
+ int count = 0;
+ char __user *arg = (void __user *)__arg;
+ struct trt *trts = NULL;
+ struct art *arts = NULL;
+ struct psvt *psvts;
+
+ switch (cmd) {
+ case ACPI_THERMAL_GET_TRT_COUNT:
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
+ &trts, false);
+ kfree(trts);
+ if (!ret)
+ return put_user(count, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_TRT_LEN:
+ ret = acpi_parse_trt(acpi_thermal_rel_handle, &count,
+ &trts, false);
+ kfree(trts);
+ length = count * sizeof(union trt_object);
+ if (!ret)
+ return put_user(length, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_TRT:
+ return fill_trt(arg);
+ case ACPI_THERMAL_GET_ART_COUNT:
+ ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
+ &arts, false);
+ kfree(arts);
+ if (!ret)
+ return put_user(count, (unsigned long __user *)__arg);
+ return ret;
+ case ACPI_THERMAL_GET_ART_LEN:
+ ret = acpi_parse_art(acpi_thermal_rel_handle, &count,
+ &arts, false);
+ kfree(arts);
+ length = count * sizeof(union art_object);
+ if (!ret)
+ return put_user(length, (unsigned long __user *)__arg);
+ return ret;
+
+ case ACPI_THERMAL_GET_ART:
+ return fill_art(arg);
+
+ case ACPI_THERMAL_GET_PSVT_COUNT:
+ ret = acpi_parse_psvt(acpi_thermal_rel_handle, &count, &psvts);
+ if (!ret) {
+ kfree(psvts);
+ return put_user(count, (unsigned long __user *)__arg);
+ }
+ return ret;
+
+ case ACPI_THERMAL_GET_PSVT_LEN:
+ /* total length of the data retrieved (count * PSVT entry size) */
+ ret = acpi_parse_psvt(acpi_thermal_rel_handle, &count, &psvts);
+ length = count * sizeof(union psvt_object);
+ if (!ret) {
+ kfree(psvts);
+ return put_user(length, (unsigned long __user *)__arg);
+ }
+ return ret;
+
+ case ACPI_THERMAL_GET_PSVT:
+ return fill_psvt(arg);
+
+ default:
+ return -ENOTTY;
+ }
+}
+
+static const struct file_operations acpi_thermal_rel_fops = {
+ .owner = THIS_MODULE,
+ .open = acpi_thermal_rel_open,
+ .release = acpi_thermal_rel_release,
+ .unlocked_ioctl = acpi_thermal_rel_ioctl,
+ .llseek = no_llseek,
+};
+
+static struct miscdevice acpi_thermal_rel_misc_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ "acpi_thermal_rel",
+ &acpi_thermal_rel_fops
+};
+
+int acpi_thermal_rel_misc_device_add(acpi_handle handle)
+{
+ acpi_thermal_rel_handle = handle;
+
+ return misc_register(&acpi_thermal_rel_misc_device);
+}
+EXPORT_SYMBOL(acpi_thermal_rel_misc_device_add);
+
+int acpi_thermal_rel_misc_device_remove(acpi_handle handle)
+{
+ misc_deregister(&acpi_thermal_rel_misc_device);
+
+ return 0;
+}
+EXPORT_SYMBOL(acpi_thermal_rel_misc_device_remove);
+
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com");
+MODULE_DESCRIPTION("Intel acpi thermal rel misc dev driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
new file mode 100644
index 0000000000..ac376d8f9e
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/acpi_thermal_rel.h
@@ -0,0 +1,146 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ACPI_ACPI_THERMAL_H
+#define __ACPI_ACPI_THERMAL_H
+
+#include <asm/ioctl.h>
+
+#define ACPI_THERMAL_MAGIC 's'
+
+#define ACPI_THERMAL_GET_TRT_LEN _IOR(ACPI_THERMAL_MAGIC, 1, unsigned long)
+#define ACPI_THERMAL_GET_ART_LEN _IOR(ACPI_THERMAL_MAGIC, 2, unsigned long)
+#define ACPI_THERMAL_GET_TRT_COUNT _IOR(ACPI_THERMAL_MAGIC, 3, unsigned long)
+#define ACPI_THERMAL_GET_ART_COUNT _IOR(ACPI_THERMAL_MAGIC, 4, unsigned long)
+
+#define ACPI_THERMAL_GET_TRT _IOR(ACPI_THERMAL_MAGIC, 5, unsigned long)
+#define ACPI_THERMAL_GET_ART _IOR(ACPI_THERMAL_MAGIC, 6, unsigned long)
+
+/*
+ * ACPI_THERMAL_GET_PSVT_COUNT = Number of PSVT entries
+ * ACPI_THERMAL_GET_PSVT_LEN = Total return data size (PSVT count x each
+ * PSVT entry size)
+ * ACPI_THERMAL_GET_PSVT = Get the data as an array of psvt_objects
+ */
+#define ACPI_THERMAL_GET_PSVT_LEN _IOR(ACPI_THERMAL_MAGIC, 7, unsigned long)
+#define ACPI_THERMAL_GET_PSVT_COUNT _IOR(ACPI_THERMAL_MAGIC, 8, unsigned long)
+#define ACPI_THERMAL_GET_PSVT _IOR(ACPI_THERMAL_MAGIC, 9, unsigned long)
+
+struct art {
+ acpi_handle source;
+ acpi_handle target;
+ struct_group(data,
+ u64 weight;
+ u64 ac0_max;
+ u64 ac1_max;
+ u64 ac2_max;
+ u64 ac3_max;
+ u64 ac4_max;
+ u64 ac5_max;
+ u64 ac6_max;
+ u64 ac7_max;
+ u64 ac8_max;
+ u64 ac9_max;
+ );
+} __packed;
+
+struct trt {
+ acpi_handle source;
+ acpi_handle target;
+ u64 influence;
+ u64 sample_period;
+ u64 reserved1;
+ u64 reserved2;
+ u64 reserved3;
+ u64 reserved4;
+} __packed;
+
+#define ACPI_NR_PSVT_ELEMENTS 12
+#define ACPI_PSVT_CONTROL_KNOB 7
+#define ACPI_LIMIT_STR_MAX_LEN 8
+
+struct psvt {
+ acpi_handle source;
+ acpi_handle target;
+ u64 priority;
+ u64 sample_period;
+ u64 passive_temp;
+ u64 source_domain;
+ u64 control_knob;
+ union {
+ /* For limit_type = ACPI_TYPE_INTEGER */
+ u64 integer;
+ /* For limit_type = ACPI_TYPE_STRING */
+ char string[ACPI_LIMIT_STR_MAX_LEN];
+ char *str_ptr;
+ } limit;
+ u64 step_size;
+ u64 limit_coeff;
+ u64 unlimit_coeff;
+ /* Spec calls this field reserved, so we borrow it for type info */
+ u64 control_knob_type; /* ACPI_TYPE_STRING or ACPI_TYPE_INTEGER */
+} __packed;
+
+#define ACPI_NR_ART_ELEMENTS 13
+/* for usrspace */
+union art_object {
+ struct {
+ char source_device[8]; /* ACPI single name */
+ char target_device[8]; /* ACPI single name */
+ struct_group(data,
+ u64 weight;
+ u64 ac0_max_level;
+ u64 ac1_max_level;
+ u64 ac2_max_level;
+ u64 ac3_max_level;
+ u64 ac4_max_level;
+ u64 ac5_max_level;
+ u64 ac6_max_level;
+ u64 ac7_max_level;
+ u64 ac8_max_level;
+ u64 ac9_max_level;
+ );
+ };
+ u64 __data[ACPI_NR_ART_ELEMENTS];
+};
+
+union trt_object {
+ struct {
+ char source_device[8]; /* ACPI single name */
+ char target_device[8]; /* ACPI single name */
+ u64 influence;
+ u64 sample_period;
+ u64 reserved[4];
+ };
+ u64 __data[8];
+};
+
+union psvt_object {
+ struct {
+ char source_device[8];
+ char target_device[8];
+ u64 priority;
+ u64 sample_period;
+ u64 passive_temp;
+ u64 source_domain;
+ u64 control_knob;
+ union {
+ u64 integer;
+ char string[ACPI_LIMIT_STR_MAX_LEN];
+ } limit;
+ u64 step_size;
+ u64 limit_coeff;
+ u64 unlimit_coeff;
+ u64 control_knob_type;
+ };
+ u64 __data[ACPI_NR_PSVT_ELEMENTS];
+};
+
+#ifdef __KERNEL__
+int acpi_thermal_rel_misc_device_add(acpi_handle handle);
+int acpi_thermal_rel_misc_device_remove(acpi_handle handle);
+int acpi_parse_art(acpi_handle handle, int *art_count, struct art **arts,
+ bool create_dev);
+int acpi_parse_trt(acpi_handle handle, int *trt_count, struct trt **trts,
+ bool create_dev);
+#endif
+
+#endif /* __ACPI_ACPI_THERMAL_H */
diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
new file mode 100644
index 0000000000..ffc2871a02
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c
@@ -0,0 +1,728 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * INT3400 thermal driver
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Zhang Rui <rui.zhang@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include "acpi_thermal_rel.h"
+
+#define INT3400_THERMAL_TABLE_CHANGED 0x83
+#define INT3400_ODVP_CHANGED 0x88
+#define INT3400_KEEP_ALIVE 0xA0
+#define INT3400_FAKE_TEMP (20 * 1000) /* faked temp sensor with 20C */
+
+enum int3400_thermal_uuid {
+ INT3400_THERMAL_ACTIVE = 0,
+ INT3400_THERMAL_PASSIVE_1,
+ INT3400_THERMAL_CRITICAL,
+ INT3400_THERMAL_ADAPTIVE_PERFORMANCE,
+ INT3400_THERMAL_EMERGENCY_CALL_MODE,
+ INT3400_THERMAL_PASSIVE_2,
+ INT3400_THERMAL_POWER_BOSS,
+ INT3400_THERMAL_VIRTUAL_SENSOR,
+ INT3400_THERMAL_COOLING_MODE,
+ INT3400_THERMAL_HARDWARE_DUTY_CYCLING,
+ INT3400_THERMAL_MAXIMUM_UUID,
+};
+
+static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = {
+ "3A95C389-E4B8-4629-A526-C52C88626BAE",
+ "42A441D6-AE6A-462b-A84B-4A8CE79027D3",
+ "97C68AE7-15FA-499c-B8C9-5DA81D606E0A",
+ "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D",
+ "5349962F-71E6-431D-9AE8-0A635B710AEE",
+ "9E04115A-AE87-4D1C-9500-0F3E340BFE75",
+ "F5A35014-C209-46A4-993A-EB56DE7530A1",
+ "6ED722A7-9240-48A5-B479-31EEF723D7CF",
+ "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531",
+ "BE84BABF-C4D4-403D-B495-3128FD44dAC1",
+};
+
+struct odvp_attr;
+
+struct int3400_thermal_priv {
+ struct acpi_device *adev;
+ struct platform_device *pdev;
+ struct thermal_zone_device *thermal;
+ int art_count;
+ struct art *arts;
+ int trt_count;
+ struct trt *trts;
+ u32 uuid_bitmap;
+ int rel_misc_dev_res;
+ int current_uuid_index;
+ char *data_vault;
+ int odvp_count;
+ int *odvp;
+ u32 os_uuid_mask;
+ int production_mode;
+ struct odvp_attr *odvp_attrs;
+};
+
+static int evaluate_odvp(struct int3400_thermal_priv *priv);
+
+struct odvp_attr {
+ int odvp;
+ struct int3400_thermal_priv *priv;
+ struct device_attribute attr;
+};
+
+static ssize_t data_vault_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *attr, char *buf, loff_t off, size_t count)
+{
+ memcpy(buf, attr->private + off, count);
+ return count;
+}
+
+static BIN_ATTR_RO(data_vault, 0);
+
+static struct bin_attribute *data_attributes[] = {
+ &bin_attr_data_vault,
+ NULL,
+};
+
+static ssize_t imok_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ acpi_status status;
+ int input, ret;
+
+ ret = kstrtouint(buf, 10, &input);
+ if (ret)
+ return ret;
+ status = acpi_execute_simple_method(priv->adev->handle, "IMOK", input);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ return count;
+}
+
+static DEVICE_ATTR_WO(imok);
+
+static struct attribute *imok_attr[] = {
+ &dev_attr_imok.attr,
+ NULL
+};
+
+static const struct attribute_group imok_attribute_group = {
+ .attrs = imok_attr,
+};
+
+static const struct attribute_group data_attribute_group = {
+ .bin_attrs = data_attributes,
+};
+
+static ssize_t available_uuids_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ int i;
+ int length = 0;
+
+ if (!priv->uuid_bitmap)
+ return sprintf(buf, "UNKNOWN\n");
+
+ for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; i++) {
+ if (priv->uuid_bitmap & (1 << i))
+ length += sysfs_emit_at(buf, length, "%s\n", int3400_thermal_uuids[i]);
+ }
+
+ return length;
+}
+
+static ssize_t current_uuid_show(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ int i, length = 0;
+
+ if (priv->current_uuid_index > 0)
+ return sprintf(buf, "%s\n",
+ int3400_thermal_uuids[priv->current_uuid_index]);
+
+ for (i = 0; i <= INT3400_THERMAL_CRITICAL; i++) {
+ if (priv->os_uuid_mask & BIT(i))
+ length += sysfs_emit_at(buf, length, "%s\n", int3400_thermal_uuids[i]);
+ }
+
+ if (length)
+ return length;
+
+ return sprintf(buf, "INVALID\n");
+}
+
+static int int3400_thermal_run_osc(acpi_handle handle, char *uuid_str, int *enable)
+{
+ u32 ret, buf[2];
+ acpi_status status;
+ int result = 0;
+ struct acpi_osc_context context = {
+ .uuid_str = uuid_str,
+ .rev = 1,
+ .cap.length = 8,
+ .cap.pointer = buf,
+ };
+
+ buf[OSC_QUERY_DWORD] = 0;
+ buf[OSC_SUPPORT_DWORD] = *enable;
+
+ status = acpi_run_osc(handle, &context);
+ if (ACPI_SUCCESS(status)) {
+ ret = *((u32 *)(context.ret.pointer + 4));
+ if (ret != *enable)
+ result = -EPERM;
+
+ kfree(context.ret.pointer);
+ } else
+ result = -EPERM;
+
+ return result;
+}
+
+static int set_os_uuid_mask(struct int3400_thermal_priv *priv, u32 mask)
+{
+ int cap = 0;
+
+ /*
+ * Capability bits:
+ * Bit 0: set to 1 to indicate DPTF is active
+ * Bi1 1: set to 1 to active cooling is supported by user space daemon
+ * Bit 2: set to 1 to passive cooling is supported by user space daemon
+ * Bit 3: set to 1 to critical trip is handled by user space daemon
+ */
+ if (mask)
+ cap = (priv->os_uuid_mask << 1) | 0x01;
+
+ return int3400_thermal_run_osc(priv->adev->handle,
+ "b23ba85d-c8b7-3542-88de-8de2ffcfd698",
+ &cap);
+}
+
+static ssize_t current_uuid_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+ int ret, i;
+
+ for (i = 0; i < INT3400_THERMAL_MAXIMUM_UUID; ++i) {
+ if (!strncmp(buf, int3400_thermal_uuids[i],
+ sizeof(int3400_thermal_uuids[i]) - 1)) {
+ /*
+ * If we have a list of supported UUIDs, make sure
+ * this one is supported.
+ */
+ if (priv->uuid_bitmap & BIT(i)) {
+ priv->current_uuid_index = i;
+ return count;
+ }
+
+ /*
+ * There is support of only 3 policies via the new
+ * _OSC to inform OS capability:
+ * INT3400_THERMAL_ACTIVE
+ * INT3400_THERMAL_PASSIVE_1
+ * INT3400_THERMAL_CRITICAL
+ */
+
+ if (i > INT3400_THERMAL_CRITICAL)
+ return -EINVAL;
+
+ priv->os_uuid_mask |= BIT(i);
+
+ break;
+ }
+ }
+
+ if (priv->os_uuid_mask) {
+ ret = set_os_uuid_mask(priv, priv->os_uuid_mask);
+ if (ret)
+ return ret;
+ }
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(current_uuid);
+static DEVICE_ATTR_RO(available_uuids);
+static struct attribute *uuid_attrs[] = {
+ &dev_attr_available_uuids.attr,
+ &dev_attr_current_uuid.attr,
+ NULL
+};
+
+static const struct attribute_group uuid_attribute_group = {
+ .attrs = uuid_attrs,
+ .name = "uuids"
+};
+
+static int int3400_thermal_get_uuids(struct int3400_thermal_priv *priv)
+{
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL};
+ union acpi_object *obja, *objb;
+ int i, j;
+ int result = 0;
+ acpi_status status;
+
+ status = acpi_evaluate_object(priv->adev->handle, "IDSP", NULL, &buf);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ obja = (union acpi_object *)buf.pointer;
+ if (obja->type != ACPI_TYPE_PACKAGE) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ for (i = 0; i < obja->package.count; i++) {
+ objb = &obja->package.elements[i];
+ if (objb->type != ACPI_TYPE_BUFFER) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ /* UUID must be 16 bytes */
+ if (objb->buffer.length != 16) {
+ result = -EINVAL;
+ goto end;
+ }
+
+ for (j = 0; j < INT3400_THERMAL_MAXIMUM_UUID; j++) {
+ guid_t guid;
+
+ guid_parse(int3400_thermal_uuids[j], &guid);
+ if (guid_equal((guid_t *)objb->buffer.pointer, &guid)) {
+ priv->uuid_bitmap |= (1 << j);
+ break;
+ }
+ }
+ }
+
+end:
+ kfree(buf.pointer);
+ return result;
+}
+
+static ssize_t production_mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct int3400_thermal_priv *priv = dev_get_drvdata(dev);
+
+ return sysfs_emit(buf, "%d\n", priv->production_mode);
+}
+
+static DEVICE_ATTR_RO(production_mode);
+
+static int production_mode_init(struct int3400_thermal_priv *priv)
+{
+ unsigned long long mode;
+ acpi_status status;
+ int ret;
+
+ priv->production_mode = -1;
+
+ status = acpi_evaluate_integer(priv->adev->handle, "DCFG", NULL, &mode);
+ /* If the method is not present, this is not an error */
+ if (ACPI_FAILURE(status))
+ return 0;
+
+ ret = sysfs_create_file(&priv->pdev->dev.kobj, &dev_attr_production_mode.attr);
+ if (ret)
+ return ret;
+
+ priv->production_mode = mode;
+
+ return 0;
+}
+
+static void production_mode_exit(struct int3400_thermal_priv *priv)
+{
+ if (priv->production_mode >= 0)
+ sysfs_remove_file(&priv->pdev->dev.kobj, &dev_attr_production_mode.attr);
+}
+
+static ssize_t odvp_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct odvp_attr *odvp_attr;
+
+ odvp_attr = container_of(attr, struct odvp_attr, attr);
+
+ return sprintf(buf, "%d\n", odvp_attr->priv->odvp[odvp_attr->odvp]);
+}
+
+static void cleanup_odvp(struct int3400_thermal_priv *priv)
+{
+ int i;
+
+ if (priv->odvp_attrs) {
+ for (i = 0; i < priv->odvp_count; i++) {
+ sysfs_remove_file(&priv->pdev->dev.kobj,
+ &priv->odvp_attrs[i].attr.attr);
+ kfree(priv->odvp_attrs[i].attr.attr.name);
+ }
+ kfree(priv->odvp_attrs);
+ }
+ kfree(priv->odvp);
+ priv->odvp_count = 0;
+}
+
+static int evaluate_odvp(struct int3400_thermal_priv *priv)
+{
+ struct acpi_buffer odvp = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj = NULL;
+ acpi_status status;
+ int i, ret;
+
+ status = acpi_evaluate_object(priv->adev->handle, "ODVP", NULL, &odvp);
+ if (ACPI_FAILURE(status)) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ obj = odvp.pointer;
+ if (obj->type != ACPI_TYPE_PACKAGE) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ if (priv->odvp == NULL) {
+ priv->odvp_count = obj->package.count;
+ priv->odvp = kmalloc_array(priv->odvp_count, sizeof(int),
+ GFP_KERNEL);
+ if (!priv->odvp) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ }
+
+ if (priv->odvp_attrs == NULL) {
+ priv->odvp_attrs = kcalloc(priv->odvp_count,
+ sizeof(struct odvp_attr),
+ GFP_KERNEL);
+ if (!priv->odvp_attrs) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ for (i = 0; i < priv->odvp_count; i++) {
+ struct odvp_attr *odvp = &priv->odvp_attrs[i];
+
+ sysfs_attr_init(&odvp->attr.attr);
+ odvp->priv = priv;
+ odvp->odvp = i;
+ odvp->attr.attr.name = kasprintf(GFP_KERNEL,
+ "odvp%d", i);
+
+ if (!odvp->attr.attr.name) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+ odvp->attr.attr.mode = 0444;
+ odvp->attr.show = odvp_show;
+ odvp->attr.store = NULL;
+ ret = sysfs_create_file(&priv->pdev->dev.kobj,
+ &odvp->attr.attr);
+ if (ret)
+ goto out_err;
+ }
+ }
+
+ for (i = 0; i < obj->package.count; i++) {
+ if (obj->package.elements[i].type == ACPI_TYPE_INTEGER)
+ priv->odvp[i] = obj->package.elements[i].integer.value;
+ }
+
+ kfree(obj);
+ return 0;
+
+out_err:
+ cleanup_odvp(priv);
+ kfree(obj);
+ return ret;
+}
+
+static void int3400_notify(acpi_handle handle,
+ u32 event,
+ void *data)
+{
+ struct int3400_thermal_priv *priv = data;
+ struct device *dev;
+ char *thermal_prop[5];
+ int therm_event;
+
+ if (!priv)
+ return;
+
+ switch (event) {
+ case INT3400_THERMAL_TABLE_CHANGED:
+ therm_event = THERMAL_TABLE_CHANGED;
+ break;
+ case INT3400_KEEP_ALIVE:
+ therm_event = THERMAL_EVENT_KEEP_ALIVE;
+ break;
+ case INT3400_ODVP_CHANGED:
+ evaluate_odvp(priv);
+ therm_event = THERMAL_DEVICE_POWER_CAPABILITY_CHANGED;
+ break;
+ default:
+ /* Ignore unknown notification codes sent to INT3400 device */
+ return;
+ }
+
+ dev = thermal_zone_device(priv->thermal);
+
+ thermal_prop[0] = kasprintf(GFP_KERNEL, "NAME=%s", thermal_zone_device_type(priv->thermal));
+ thermal_prop[1] = kasprintf(GFP_KERNEL, "TEMP=%d", INT3400_FAKE_TEMP);
+ thermal_prop[2] = kasprintf(GFP_KERNEL, "TRIP=");
+ thermal_prop[3] = kasprintf(GFP_KERNEL, "EVENT=%d", therm_event);
+ thermal_prop[4] = NULL;
+ kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, thermal_prop);
+ kfree(thermal_prop[0]);
+ kfree(thermal_prop[1]);
+ kfree(thermal_prop[2]);
+ kfree(thermal_prop[3]);
+}
+
+static int int3400_thermal_get_temp(struct thermal_zone_device *thermal,
+ int *temp)
+{
+ *temp = INT3400_FAKE_TEMP;
+ return 0;
+}
+
+static int int3400_thermal_change_mode(struct thermal_zone_device *thermal,
+ enum thermal_device_mode mode)
+{
+ struct int3400_thermal_priv *priv = thermal_zone_device_priv(thermal);
+ int result = 0;
+ int enabled;
+
+ if (!priv)
+ return -EINVAL;
+
+ enabled = mode == THERMAL_DEVICE_ENABLED;
+
+ if (priv->os_uuid_mask) {
+ if (!enabled) {
+ priv->os_uuid_mask = 0;
+ result = set_os_uuid_mask(priv, priv->os_uuid_mask);
+ }
+ goto eval_odvp;
+ }
+
+ if (priv->current_uuid_index < 0 ||
+ priv->current_uuid_index >= INT3400_THERMAL_MAXIMUM_UUID)
+ return -EINVAL;
+
+ result = int3400_thermal_run_osc(priv->adev->handle,
+ int3400_thermal_uuids[priv->current_uuid_index],
+ &enabled);
+eval_odvp:
+ evaluate_odvp(priv);
+
+ return result;
+}
+
+static struct thermal_zone_device_ops int3400_thermal_ops = {
+ .get_temp = int3400_thermal_get_temp,
+ .change_mode = int3400_thermal_change_mode,
+};
+
+static struct thermal_zone_params int3400_thermal_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+static void int3400_setup_gddv(struct int3400_thermal_priv *priv)
+{
+ struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *obj;
+ acpi_status status;
+
+ status = acpi_evaluate_object(priv->adev->handle, "GDDV", NULL,
+ &buffer);
+ if (ACPI_FAILURE(status) || !buffer.length)
+ return;
+
+ obj = buffer.pointer;
+ if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count != 1
+ || obj->package.elements[0].type != ACPI_TYPE_BUFFER)
+ goto out_free;
+
+ priv->data_vault = kmemdup(obj->package.elements[0].buffer.pointer,
+ obj->package.elements[0].buffer.length,
+ GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(priv->data_vault))
+ goto out_free;
+
+ bin_attr_data_vault.private = priv->data_vault;
+ bin_attr_data_vault.size = obj->package.elements[0].buffer.length;
+out_free:
+ kfree(buffer.pointer);
+}
+
+static int int3400_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3400_thermal_priv *priv;
+ int result;
+
+ if (!adev)
+ return -ENODEV;
+
+ priv = kzalloc(sizeof(struct int3400_thermal_priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->pdev = pdev;
+ priv->adev = adev;
+
+ result = int3400_thermal_get_uuids(priv);
+
+ /* Missing IDSP isn't fatal */
+ if (result && result != -ENODEV)
+ goto free_priv;
+
+ priv->current_uuid_index = -1;
+
+ result = acpi_parse_art(priv->adev->handle, &priv->art_count,
+ &priv->arts, true);
+ if (result)
+ dev_dbg(&pdev->dev, "_ART table parsing error\n");
+
+ result = acpi_parse_trt(priv->adev->handle, &priv->trt_count,
+ &priv->trts, true);
+ if (result)
+ dev_dbg(&pdev->dev, "_TRT table parsing error\n");
+
+ platform_set_drvdata(pdev, priv);
+
+ int3400_setup_gddv(priv);
+
+ evaluate_odvp(priv);
+
+ priv->thermal = thermal_tripless_zone_device_register("INT3400 Thermal", priv,
+ &int3400_thermal_ops,
+ &int3400_thermal_params);
+ if (IS_ERR(priv->thermal)) {
+ result = PTR_ERR(priv->thermal);
+ goto free_art_trt;
+ }
+
+ priv->rel_misc_dev_res = acpi_thermal_rel_misc_device_add(
+ priv->adev->handle);
+
+ result = sysfs_create_group(&pdev->dev.kobj, &uuid_attribute_group);
+ if (result)
+ goto free_rel_misc;
+
+ if (acpi_has_method(priv->adev->handle, "IMOK")) {
+ result = sysfs_create_group(&pdev->dev.kobj, &imok_attribute_group);
+ if (result)
+ goto free_imok;
+ }
+
+ if (!ZERO_OR_NULL_PTR(priv->data_vault)) {
+ result = sysfs_create_group(&pdev->dev.kobj,
+ &data_attribute_group);
+ if (result)
+ goto free_uuid;
+ }
+
+ result = acpi_install_notify_handler(
+ priv->adev->handle, ACPI_DEVICE_NOTIFY, int3400_notify,
+ (void *)priv);
+ if (result)
+ goto free_sysfs;
+
+ result = production_mode_init(priv);
+ if (result)
+ goto free_notify;
+
+ return 0;
+
+free_notify:
+ acpi_remove_notify_handler(priv->adev->handle, ACPI_DEVICE_NOTIFY,
+ int3400_notify);
+free_sysfs:
+ cleanup_odvp(priv);
+ if (!ZERO_OR_NULL_PTR(priv->data_vault)) {
+ sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
+ kfree(priv->data_vault);
+ }
+free_uuid:
+ sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+free_imok:
+ sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
+free_rel_misc:
+ if (!priv->rel_misc_dev_res)
+ acpi_thermal_rel_misc_device_remove(priv->adev->handle);
+ thermal_zone_device_unregister(priv->thermal);
+free_art_trt:
+ kfree(priv->trts);
+ kfree(priv->arts);
+free_priv:
+ kfree(priv);
+ return result;
+}
+
+static int int3400_thermal_remove(struct platform_device *pdev)
+{
+ struct int3400_thermal_priv *priv = platform_get_drvdata(pdev);
+
+ production_mode_exit(priv);
+
+ acpi_remove_notify_handler(
+ priv->adev->handle, ACPI_DEVICE_NOTIFY,
+ int3400_notify);
+
+ cleanup_odvp(priv);
+
+ if (!priv->rel_misc_dev_res)
+ acpi_thermal_rel_misc_device_remove(priv->adev->handle);
+
+ if (!ZERO_OR_NULL_PTR(priv->data_vault))
+ sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
+ sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+ sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
+ thermal_zone_device_unregister(priv->thermal);
+ kfree(priv->data_vault);
+ kfree(priv->trts);
+ kfree(priv->arts);
+ kfree(priv);
+ return 0;
+}
+
+static const struct acpi_device_id int3400_thermal_match[] = {
+ {"INT3400", 0},
+ {"INTC1040", 0},
+ {"INTC1041", 0},
+ {"INTC1042", 0},
+ {"INTC10A0", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3400_thermal_match);
+
+static struct platform_driver int3400_thermal_driver = {
+ .probe = int3400_thermal_probe,
+ .remove = int3400_thermal_remove,
+ .driver = {
+ .name = "int3400 thermal",
+ .acpi_match_table = ACPI_PTR(int3400_thermal_match),
+ },
+};
+
+module_platform_driver(int3400_thermal_driver);
+
+MODULE_DESCRIPTION("INT3400 Thermal driver");
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/int340x_thermal/int3401_thermal.c b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
new file mode 100644
index 0000000000..c93a28eec4
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int3401_thermal.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * INT3401 processor thermal device
+ * Copyright (c) 2020, Intel Corporation.
+ */
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#include "int340x_thermal_zone.h"
+#include "processor_thermal_device.h"
+
+static const struct acpi_device_id int3401_device_ids[] = {
+ {"INT3401", 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, int3401_device_ids);
+
+static int int3401_add(struct platform_device *pdev)
+{
+ struct proc_thermal_device *proc_priv;
+ int ret;
+
+ proc_priv = devm_kzalloc(&pdev->dev, sizeof(*proc_priv), GFP_KERNEL);
+ if (!proc_priv)
+ return -ENOMEM;
+
+ ret = proc_thermal_add(&pdev->dev, proc_priv);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, proc_priv);
+
+ return ret;
+}
+
+static int int3401_remove(struct platform_device *pdev)
+{
+ proc_thermal_remove(platform_get_drvdata(pdev));
+
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int int3401_thermal_suspend(struct device *dev)
+{
+ return proc_thermal_suspend(dev);
+}
+static int int3401_thermal_resume(struct device *dev)
+{
+ return proc_thermal_resume(dev);
+}
+#else
+#define int3401_thermal_suspend NULL
+#define int3401_thermal_resume NULL
+#endif
+
+static SIMPLE_DEV_PM_OPS(int3401_proc_thermal_pm, int3401_thermal_suspend,
+ int3401_thermal_resume);
+
+static struct platform_driver int3401_driver = {
+ .probe = int3401_add,
+ .remove = int3401_remove,
+ .driver = {
+ .name = "int3401 thermal",
+ .acpi_match_table = int3401_device_ids,
+ .pm = &int3401_proc_thermal_pm,
+ },
+};
+
+module_platform_driver(int3401_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/int3402_thermal.c b/drivers/thermal/intel/int340x_thermal/int3402_thermal.c
new file mode 100644
index 0000000000..43fa351e2b
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int3402_thermal.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * INT3402 thermal driver for memory temperature reporting
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include "int340x_thermal_zone.h"
+
+#define INT3402_PERF_CHANGED_EVENT 0x80
+#define INT3402_THERMAL_EVENT 0x90
+
+struct int3402_thermal_data {
+ acpi_handle *handle;
+ struct int34x_thermal_zone *int340x_zone;
+};
+
+static void int3402_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct int3402_thermal_data *priv = data;
+
+ if (!priv)
+ return;
+
+ switch (event) {
+ case INT3402_PERF_CHANGED_EVENT:
+ break;
+ case INT3402_THERMAL_EVENT:
+ int340x_thermal_zone_device_update(priv->int340x_zone,
+ THERMAL_TRIP_VIOLATED);
+ break;
+ default:
+ break;
+ }
+}
+
+static int int3402_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3402_thermal_data *d;
+ int ret;
+
+ if (!acpi_has_method(adev->handle, "_TMP"))
+ return -ENODEV;
+
+ d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+
+ d->int340x_zone = int340x_thermal_zone_add(adev, NULL);
+ if (IS_ERR(d->int340x_zone))
+ return PTR_ERR(d->int340x_zone);
+
+ ret = acpi_install_notify_handler(adev->handle,
+ ACPI_DEVICE_NOTIFY,
+ int3402_notify,
+ d);
+ if (ret) {
+ int340x_thermal_zone_remove(d->int340x_zone);
+ return ret;
+ }
+
+ d->handle = adev->handle;
+ platform_set_drvdata(pdev, d);
+
+ return 0;
+}
+
+static int int3402_thermal_remove(struct platform_device *pdev)
+{
+ struct int3402_thermal_data *d = platform_get_drvdata(pdev);
+
+ acpi_remove_notify_handler(d->handle,
+ ACPI_DEVICE_NOTIFY, int3402_notify);
+ int340x_thermal_zone_remove(d->int340x_zone);
+
+ return 0;
+}
+
+static const struct acpi_device_id int3402_thermal_match[] = {
+ {"INT3402", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3402_thermal_match);
+
+static struct platform_driver int3402_thermal_driver = {
+ .probe = int3402_thermal_probe,
+ .remove = int3402_thermal_remove,
+ .driver = {
+ .name = "int3402 thermal",
+ .acpi_match_table = int3402_thermal_match,
+ },
+};
+
+module_platform_driver(int3402_thermal_driver);
+
+MODULE_DESCRIPTION("INT3402 Thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/thermal/intel/int340x_thermal/int3403_thermal.c b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
new file mode 100644
index 0000000000..e418d270bc
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int3403_thermal.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ACPI INT3403 thermal driver
+ * Copyright (c) 2013, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include <linux/platform_device.h>
+#include "int340x_thermal_zone.h"
+
+#define INT3403_TYPE_SENSOR 0x03
+#define INT3403_TYPE_CHARGER 0x0B
+#define INT3403_TYPE_BATTERY 0x0C
+#define INT3403_PERF_CHANGED_EVENT 0x80
+#define INT3403_PERF_TRIP_POINT_CHANGED 0x81
+#define INT3403_THERMAL_EVENT 0x90
+
+/* Preserved structure for future expandbility */
+struct int3403_sensor {
+ struct int34x_thermal_zone *int340x_zone;
+};
+
+struct int3403_performance_state {
+ u64 performance;
+ u64 power;
+ u64 latency;
+ u64 linear;
+ u64 control;
+ u64 raw_performace;
+ char *raw_unit;
+ int reserved;
+};
+
+struct int3403_cdev {
+ struct thermal_cooling_device *cdev;
+ unsigned long max_state;
+};
+
+struct int3403_priv {
+ struct platform_device *pdev;
+ struct acpi_device *adev;
+ unsigned long long type;
+ void *priv;
+};
+
+static void int3403_notify(acpi_handle handle,
+ u32 event, void *data)
+{
+ struct int3403_priv *priv = data;
+ struct int3403_sensor *obj;
+
+ if (!priv)
+ return;
+
+ obj = priv->priv;
+ if (priv->type != INT3403_TYPE_SENSOR || !obj)
+ return;
+
+ switch (event) {
+ case INT3403_PERF_CHANGED_EVENT:
+ break;
+ case INT3403_THERMAL_EVENT:
+ int340x_thermal_zone_device_update(obj->int340x_zone,
+ THERMAL_TRIP_VIOLATED);
+ break;
+ case INT3403_PERF_TRIP_POINT_CHANGED:
+ int340x_thermal_update_trips(obj->int340x_zone);
+ int340x_thermal_zone_device_update(obj->int340x_zone,
+ THERMAL_TRIP_CHANGED);
+ break;
+ default:
+ dev_dbg(&priv->pdev->dev, "Unsupported event [0x%x]\n", event);
+ break;
+ }
+}
+
+static int int3403_sensor_add(struct int3403_priv *priv)
+{
+ int result = 0;
+ struct int3403_sensor *obj;
+
+ obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ priv->priv = obj;
+
+ obj->int340x_zone = int340x_thermal_zone_add(priv->adev, NULL);
+ if (IS_ERR(obj->int340x_zone))
+ return PTR_ERR(obj->int340x_zone);
+
+ result = acpi_install_notify_handler(priv->adev->handle,
+ ACPI_DEVICE_NOTIFY, int3403_notify,
+ (void *)priv);
+ if (result)
+ goto err_free_obj;
+
+ return 0;
+
+ err_free_obj:
+ int340x_thermal_zone_remove(obj->int340x_zone);
+ return result;
+}
+
+static int int3403_sensor_remove(struct int3403_priv *priv)
+{
+ struct int3403_sensor *obj = priv->priv;
+
+ acpi_remove_notify_handler(priv->adev->handle,
+ ACPI_DEVICE_NOTIFY, int3403_notify);
+ int340x_thermal_zone_remove(obj->int340x_zone);
+
+ return 0;
+}
+
+/* INT3403 Cooling devices */
+static int int3403_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ struct int3403_cdev *obj = priv->priv;
+
+ *state = obj->max_state;
+ return 0;
+}
+
+static int int3403_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ unsigned long long level;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(priv->adev->handle, "PPPC", NULL, &level);
+ if (ACPI_SUCCESS(status)) {
+ *state = level;
+ return 0;
+ } else
+ return -EINVAL;
+}
+
+static int
+int3403_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state)
+{
+ struct int3403_priv *priv = cdev->devdata;
+ acpi_status status;
+
+ status = acpi_execute_simple_method(priv->adev->handle, "SPPC", state);
+ if (ACPI_SUCCESS(status))
+ return 0;
+ else
+ return -EINVAL;
+}
+
+static const struct thermal_cooling_device_ops int3403_cooling_ops = {
+ .get_max_state = int3403_get_max_state,
+ .get_cur_state = int3403_get_cur_state,
+ .set_cur_state = int3403_set_cur_state,
+};
+
+static int int3403_cdev_add(struct int3403_priv *priv)
+{
+ int result = 0;
+ acpi_status status;
+ struct int3403_cdev *obj;
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *p;
+
+ obj = devm_kzalloc(&priv->pdev->dev, sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ status = acpi_evaluate_object(priv->adev->handle, "PPSS", NULL, &buf);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buf.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ pr_warn("Invalid PPSS data\n");
+ kfree(buf.pointer);
+ return -EFAULT;
+ }
+
+ priv->priv = obj;
+ obj->max_state = p->package.count - 1;
+ obj->cdev =
+ thermal_cooling_device_register(acpi_device_bid(priv->adev),
+ priv, &int3403_cooling_ops);
+ if (IS_ERR(obj->cdev))
+ result = PTR_ERR(obj->cdev);
+
+ kfree(buf.pointer);
+ /* TODO: add ACPI notification support */
+
+ return result;
+}
+
+static int int3403_cdev_remove(struct int3403_priv *priv)
+{
+ struct int3403_cdev *obj = priv->priv;
+
+ thermal_cooling_device_unregister(obj->cdev);
+ return 0;
+}
+
+static int int3403_add(struct platform_device *pdev)
+{
+ struct int3403_priv *priv;
+ int result = 0;
+ unsigned long long tmp;
+ acpi_status status;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(struct int3403_priv),
+ GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->pdev = pdev;
+ priv->adev = ACPI_COMPANION(&(pdev->dev));
+ if (!priv->adev) {
+ result = -EINVAL;
+ goto err;
+ }
+
+
+ status = acpi_evaluate_integer(priv->adev->handle, "_TMP",
+ NULL, &tmp);
+ if (ACPI_FAILURE(status)) {
+ status = acpi_evaluate_integer(priv->adev->handle, "PTYP",
+ NULL, &priv->type);
+ if (ACPI_FAILURE(status)) {
+ result = -EINVAL;
+ goto err;
+ }
+ } else {
+ priv->type = INT3403_TYPE_SENSOR;
+ }
+
+ platform_set_drvdata(pdev, priv);
+ switch (priv->type) {
+ case INT3403_TYPE_SENSOR:
+ result = int3403_sensor_add(priv);
+ break;
+ case INT3403_TYPE_CHARGER:
+ case INT3403_TYPE_BATTERY:
+ result = int3403_cdev_add(priv);
+ break;
+ default:
+ result = -EINVAL;
+ }
+
+ if (result)
+ goto err;
+ return result;
+
+err:
+ return result;
+}
+
+static int int3403_remove(struct platform_device *pdev)
+{
+ struct int3403_priv *priv = platform_get_drvdata(pdev);
+
+ switch (priv->type) {
+ case INT3403_TYPE_SENSOR:
+ int3403_sensor_remove(priv);
+ break;
+ case INT3403_TYPE_CHARGER:
+ case INT3403_TYPE_BATTERY:
+ int3403_cdev_remove(priv);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static const struct acpi_device_id int3403_device_ids[] = {
+ {"INT3403", 0},
+ {"INTC1043", 0},
+ {"INTC1046", 0},
+ {"INTC1062", 0},
+ {"INTC10A1", 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, int3403_device_ids);
+
+static struct platform_driver int3403_driver = {
+ .probe = int3403_add,
+ .remove = int3403_remove,
+ .driver = {
+ .name = "int3403 thermal",
+ .acpi_match_table = int3403_device_ids,
+ },
+};
+
+module_platform_driver(int3403_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("ACPI INT3403 thermal driver");
diff --git a/drivers/thermal/intel/int340x_thermal/int3406_thermal.c b/drivers/thermal/intel/int340x_thermal/int3406_thermal.c
new file mode 100644
index 0000000000..f5e42fc2ac
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int3406_thermal.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * INT3406 thermal driver for display participant device
+ *
+ * Copyright (C) 2016, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/thermal.h>
+#include <acpi/video.h>
+
+#define INT3406_BRIGHTNESS_LIMITS_CHANGED 0x80
+
+struct int3406_thermal_data {
+ int upper_limit;
+ int lower_limit;
+ acpi_handle handle;
+ struct acpi_video_device_brightness *br;
+ struct backlight_device *raw_bd;
+ struct thermal_cooling_device *cooling_dev;
+};
+
+/*
+ * According to the ACPI spec,
+ * "Each brightness level is represented by a number between 0 and 100,
+ * and can be thought of as a percentage. For example, 50 can be 50%
+ * power consumption or 50% brightness, as defined by the OEM."
+ *
+ * As int3406 device uses this value to communicate with the native
+ * graphics driver, we make the assumption that it represents
+ * the percentage of brightness only
+ */
+#define ACPI_TO_RAW(v, d) (d->raw_bd->props.max_brightness * v / 100)
+#define RAW_TO_ACPI(v, d) (v * 100 / d->raw_bd->props.max_brightness)
+
+static int
+int3406_thermal_get_max_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long *state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+
+ *state = d->upper_limit - d->lower_limit;
+ return 0;
+}
+
+static int
+int3406_thermal_set_cur_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+ int acpi_level, raw_level;
+
+ if (state > d->upper_limit - d->lower_limit)
+ return -EINVAL;
+
+ acpi_level = d->br->levels[d->upper_limit - state];
+
+ raw_level = ACPI_TO_RAW(acpi_level, d);
+
+ return backlight_device_set_brightness(d->raw_bd, raw_level);
+}
+
+static int
+int3406_thermal_get_cur_state(struct thermal_cooling_device *cooling_dev,
+ unsigned long *state)
+{
+ struct int3406_thermal_data *d = cooling_dev->devdata;
+ int acpi_level;
+ int index;
+
+ acpi_level = RAW_TO_ACPI(d->raw_bd->props.brightness, d);
+
+ /*
+ * There is no 1:1 mapping between the firmware interface level
+ * with the raw interface level, we will have to find one that is
+ * right above it.
+ */
+ for (index = d->lower_limit; index < d->upper_limit; index++) {
+ if (acpi_level <= d->br->levels[index])
+ break;
+ }
+
+ *state = d->upper_limit - index;
+ return 0;
+}
+
+static const struct thermal_cooling_device_ops video_cooling_ops = {
+ .get_max_state = int3406_thermal_get_max_state,
+ .get_cur_state = int3406_thermal_get_cur_state,
+ .set_cur_state = int3406_thermal_set_cur_state,
+};
+
+static int int3406_thermal_get_index(int *array, int nr, int value)
+{
+ int i;
+
+ for (i = 2; i < nr; i++) {
+ if (array[i] == value)
+ break;
+ }
+ return i == nr ? -ENOENT : i;
+}
+
+static void int3406_thermal_get_limit(struct int3406_thermal_data *d)
+{
+ acpi_status status;
+ unsigned long long lower_limit, upper_limit;
+
+ status = acpi_evaluate_integer(d->handle, "DDDL", NULL, &lower_limit);
+ if (ACPI_SUCCESS(status))
+ d->lower_limit = int3406_thermal_get_index(d->br->levels,
+ d->br->count, lower_limit);
+
+ status = acpi_evaluate_integer(d->handle, "DDPC", NULL, &upper_limit);
+ if (ACPI_SUCCESS(status))
+ d->upper_limit = int3406_thermal_get_index(d->br->levels,
+ d->br->count, upper_limit);
+
+ /* lower_limit and upper_limit should be always set */
+ d->lower_limit = d->lower_limit > 0 ? d->lower_limit : 2;
+ d->upper_limit = d->upper_limit > 0 ? d->upper_limit : d->br->count - 1;
+}
+
+static void int3406_notify(acpi_handle handle, u32 event, void *data)
+{
+ if (event == INT3406_BRIGHTNESS_LIMITS_CHANGED)
+ int3406_thermal_get_limit(data);
+}
+
+static int int3406_thermal_probe(struct platform_device *pdev)
+{
+ struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+ struct int3406_thermal_data *d;
+ struct backlight_device *bd;
+ int ret;
+
+ if (!ACPI_HANDLE(&pdev->dev))
+ return -ENODEV;
+
+ d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+ if (!d)
+ return -ENOMEM;
+ d->handle = ACPI_HANDLE(&pdev->dev);
+
+ bd = backlight_device_get_by_type(BACKLIGHT_RAW);
+ if (!bd)
+ return -ENODEV;
+ d->raw_bd = bd;
+
+ ret = acpi_video_get_levels(ACPI_COMPANION(&pdev->dev), &d->br, NULL);
+ if (ret)
+ return ret;
+
+ int3406_thermal_get_limit(d);
+
+ d->cooling_dev = thermal_cooling_device_register(acpi_device_bid(adev),
+ d, &video_cooling_ops);
+ if (IS_ERR(d->cooling_dev))
+ goto err;
+
+ ret = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
+ int3406_notify, d);
+ if (ret)
+ goto err_cdev;
+
+ platform_set_drvdata(pdev, d);
+
+ return 0;
+
+err_cdev:
+ thermal_cooling_device_unregister(d->cooling_dev);
+err:
+ kfree(d->br);
+ return -ENODEV;
+}
+
+static int int3406_thermal_remove(struct platform_device *pdev)
+{
+ struct int3406_thermal_data *d = platform_get_drvdata(pdev);
+
+ thermal_cooling_device_unregister(d->cooling_dev);
+ kfree(d->br);
+ return 0;
+}
+
+static const struct acpi_device_id int3406_thermal_match[] = {
+ {"INT3406", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3406_thermal_match);
+
+static struct platform_driver int3406_thermal_driver = {
+ .probe = int3406_thermal_probe,
+ .remove = int3406_thermal_remove,
+ .driver = {
+ .name = "int3406 thermal",
+ .acpi_match_table = int3406_thermal_match,
+ },
+};
+
+module_platform_driver(int3406_thermal_driver);
+
+MODULE_DESCRIPTION("INT3406 Thermal driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
new file mode 100644
index 0000000000..89cf007146
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * int340x_thermal_zone.c
+ * Copyright (c) 2015, Intel Corporation.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+#include <linux/units.h>
+#include "int340x_thermal_zone.h"
+
+static int int340x_thermal_get_zone_temp(struct thermal_zone_device *zone,
+ int *temp)
+{
+ struct int34x_thermal_zone *d = thermal_zone_device_priv(zone);
+ unsigned long long tmp;
+ acpi_status status;
+
+ status = acpi_evaluate_integer(d->adev->handle, "_TMP", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ if (d->lpat_table) {
+ int conv_temp;
+
+ conv_temp = acpi_lpat_raw_to_temp(d->lpat_table, (int)tmp);
+ if (conv_temp < 0)
+ return conv_temp;
+
+ *temp = conv_temp * 10;
+ } else {
+ /* _TMP returns the temperature in tenths of degrees Kelvin */
+ *temp = deci_kelvin_to_millicelsius(tmp);
+ }
+
+ return 0;
+}
+
+static int int340x_thermal_set_trip_temp(struct thermal_zone_device *zone,
+ int trip, int temp)
+{
+ struct int34x_thermal_zone *d = thermal_zone_device_priv(zone);
+ char name[] = {'P', 'A', 'T', '0' + trip, '\0'};
+ acpi_status status;
+
+ if (trip > 9)
+ return -EINVAL;
+
+ status = acpi_execute_simple_method(d->adev->handle, name,
+ millicelsius_to_deci_kelvin(temp));
+ if (ACPI_FAILURE(status))
+ return -EIO;
+
+ return 0;
+}
+
+static void int340x_thermal_critical(struct thermal_zone_device *zone)
+{
+ dev_dbg(&zone->device, "%s: critical temperature reached\n", zone->type);
+}
+
+static struct thermal_zone_device_ops int340x_thermal_zone_ops = {
+ .get_temp = int340x_thermal_get_zone_temp,
+ .set_trip_temp = int340x_thermal_set_trip_temp,
+ .critical = int340x_thermal_critical,
+};
+
+static int int340x_thermal_read_trips(struct acpi_device *zone_adev,
+ struct thermal_trip *zone_trips,
+ int trip_cnt)
+{
+ int i, ret;
+
+ ret = thermal_acpi_critical_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_CRITICAL;
+ trip_cnt++;
+ }
+
+ ret = thermal_acpi_hot_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_HOT;
+ trip_cnt++;
+ }
+
+ ret = thermal_acpi_passive_trip_temp(zone_adev,
+ &zone_trips[trip_cnt].temperature);
+ if (!ret) {
+ zone_trips[trip_cnt].type = THERMAL_TRIP_PASSIVE;
+ trip_cnt++;
+ }
+
+ for (i = 0; i < INT340X_THERMAL_MAX_ACT_TRIP_COUNT; i++) {
+ ret = thermal_acpi_active_trip_temp(zone_adev, i,
+ &zone_trips[trip_cnt].temperature);
+ if (ret)
+ break;
+
+ zone_trips[trip_cnt].type = THERMAL_TRIP_ACTIVE;
+ trip_cnt++;
+ }
+
+ return trip_cnt;
+}
+
+static struct thermal_zone_params int340x_thermal_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *adev,
+ int (*get_temp) (struct thermal_zone_device *, int *))
+{
+ struct int34x_thermal_zone *int34x_zone;
+ struct thermal_trip *zone_trips;
+ unsigned long long trip_cnt = 0;
+ unsigned long long hyst;
+ int trip_mask = 0;
+ acpi_status status;
+ int i, ret;
+
+ int34x_zone = kzalloc(sizeof(*int34x_zone), GFP_KERNEL);
+ if (!int34x_zone)
+ return ERR_PTR(-ENOMEM);
+
+ int34x_zone->adev = adev;
+
+ int34x_zone->ops = kmemdup(&int340x_thermal_zone_ops,
+ sizeof(int340x_thermal_zone_ops), GFP_KERNEL);
+ if (!int34x_zone->ops) {
+ ret = -ENOMEM;
+ goto err_ops_alloc;
+ }
+
+ if (get_temp)
+ int34x_zone->ops->get_temp = get_temp;
+
+ status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
+ if (ACPI_SUCCESS(status)) {
+ int34x_zone->aux_trip_nr = trip_cnt;
+ trip_mask = BIT(trip_cnt) - 1;
+ }
+
+ zone_trips = kzalloc(sizeof(*zone_trips) * (trip_cnt + INT340X_THERMAL_MAX_TRIP_COUNT),
+ GFP_KERNEL);
+ if (!zone_trips) {
+ ret = -ENOMEM;
+ goto err_trips_alloc;
+ }
+
+ for (i = 0; i < trip_cnt; i++) {
+ zone_trips[i].type = THERMAL_TRIP_PASSIVE;
+ zone_trips[i].temperature = THERMAL_TEMP_INVALID;
+ }
+
+ trip_cnt = int340x_thermal_read_trips(adev, zone_trips, trip_cnt);
+
+ status = acpi_evaluate_integer(adev->handle, "GTSH", NULL, &hyst);
+ if (ACPI_SUCCESS(status))
+ hyst *= 100;
+ else
+ hyst = 0;
+
+ for (i = 0; i < trip_cnt; ++i)
+ zone_trips[i].hysteresis = hyst;
+
+ int34x_zone->trips = zone_trips;
+
+ int34x_zone->lpat_table = acpi_lpat_get_conversion_table(adev->handle);
+
+ int34x_zone->zone = thermal_zone_device_register_with_trips(
+ acpi_device_bid(adev),
+ zone_trips, trip_cnt,
+ trip_mask, int34x_zone,
+ int34x_zone->ops,
+ &int340x_thermal_params,
+ 0, 0);
+ if (IS_ERR(int34x_zone->zone)) {
+ ret = PTR_ERR(int34x_zone->zone);
+ goto err_thermal_zone;
+ }
+ ret = thermal_zone_device_enable(int34x_zone->zone);
+ if (ret)
+ goto err_enable;
+
+ return int34x_zone;
+
+err_enable:
+ thermal_zone_device_unregister(int34x_zone->zone);
+err_thermal_zone:
+ kfree(int34x_zone->trips);
+ acpi_lpat_free_conversion_table(int34x_zone->lpat_table);
+err_trips_alloc:
+ kfree(int34x_zone->ops);
+err_ops_alloc:
+ kfree(int34x_zone);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(int340x_thermal_zone_add);
+
+void int340x_thermal_zone_remove(struct int34x_thermal_zone *int34x_zone)
+{
+ thermal_zone_device_unregister(int34x_zone->zone);
+ acpi_lpat_free_conversion_table(int34x_zone->lpat_table);
+ kfree(int34x_zone->trips);
+ kfree(int34x_zone->ops);
+ kfree(int34x_zone);
+}
+EXPORT_SYMBOL_GPL(int340x_thermal_zone_remove);
+
+void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone)
+{
+ struct acpi_device *zone_adev = int34x_zone->adev;
+ struct thermal_trip *zone_trips = int34x_zone->trips;
+ int trip_cnt = int34x_zone->zone->num_trips;
+ int act_trip_nr = 0;
+ int i;
+
+ mutex_lock(&int34x_zone->zone->lock);
+
+ for (i = int34x_zone->aux_trip_nr; i < trip_cnt; i++) {
+ int temp, err;
+
+ switch (zone_trips[i].type) {
+ case THERMAL_TRIP_CRITICAL:
+ err = thermal_acpi_critical_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_HOT:
+ err = thermal_acpi_hot_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_PASSIVE:
+ err = thermal_acpi_passive_trip_temp(zone_adev, &temp);
+ break;
+ case THERMAL_TRIP_ACTIVE:
+ err = thermal_acpi_active_trip_temp(zone_adev, act_trip_nr++,
+ &temp);
+ break;
+ default:
+ err = -ENODEV;
+ }
+ if (err) {
+ zone_trips[i].temperature = THERMAL_TEMP_INVALID;
+ continue;
+ }
+
+ zone_trips[i].temperature = temp;
+ }
+
+ mutex_unlock(&int34x_zone->zone->lock);
+}
+EXPORT_SYMBOL_GPL(int340x_thermal_update_trips);
+
+MODULE_AUTHOR("Aaron Lu <aaron.lu@intel.com>");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Intel INT340x common thermal zone handler");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
new file mode 100644
index 0000000000..e0df6271fa
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/int340x_thermal_zone.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * int340x_thermal_zone.h
+ * Copyright (c) 2015, Intel Corporation.
+ */
+
+#ifndef __INT340X_THERMAL_ZONE_H__
+#define __INT340X_THERMAL_ZONE_H__
+
+#include <acpi/acpi_lpat.h>
+
+#define INT340X_THERMAL_MAX_ACT_TRIP_COUNT 10
+#define INT340X_THERMAL_MAX_TRIP_COUNT INT340X_THERMAL_MAX_ACT_TRIP_COUNT + 3
+
+struct active_trip {
+ int temp;
+ int id;
+ bool valid;
+};
+
+struct int34x_thermal_zone {
+ struct acpi_device *adev;
+ struct thermal_trip *trips;
+ int aux_trip_nr;
+ struct thermal_zone_device *zone;
+ struct thermal_zone_device_ops *ops;
+ void *priv_data;
+ struct acpi_lpat_conversion_table *lpat_table;
+};
+
+struct int34x_thermal_zone *int340x_thermal_zone_add(struct acpi_device *,
+ int (*get_temp) (struct thermal_zone_device *, int *));
+void int340x_thermal_zone_remove(struct int34x_thermal_zone *);
+void int340x_thermal_update_trips(struct int34x_thermal_zone *int34x_zone);
+
+static inline void int340x_thermal_zone_set_priv_data(
+ struct int34x_thermal_zone *tzone, void *priv_data)
+{
+ tzone->priv_data = priv_data;
+}
+
+static inline void *int340x_thermal_zone_get_priv_data(
+ struct int34x_thermal_zone *tzone)
+{
+ return tzone->priv_data;
+}
+
+static inline void int340x_thermal_zone_device_update(
+ struct int34x_thermal_zone *tzone,
+ enum thermal_notify_event event)
+{
+ thermal_zone_device_update(tzone->zone, event);
+}
+
+#endif
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
new file mode 100644
index 0000000000..3ca0a2f593
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor_thermal_device.c
+ * Copyright (c) 2014, Intel Corporation.
+ */
+#include <linux/acpi.h>
+#include <linux/intel_tcc.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/thermal.h>
+#include "int340x_thermal_zone.h"
+#include "processor_thermal_device.h"
+#include "../intel_soc_dts_iosf.h"
+
+#define DRV_NAME "proc_thermal"
+
+#define POWER_LIMIT_SHOW(index, suffix) \
+static ssize_t power_limit_##index##_##suffix##_show(struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct proc_thermal_device *proc_dev = dev_get_drvdata(dev); \
+ \
+ return sprintf(buf, "%lu\n",\
+ (unsigned long)proc_dev->power_limits[index].suffix * 1000); \
+}
+
+POWER_LIMIT_SHOW(0, min_uw)
+POWER_LIMIT_SHOW(0, max_uw)
+POWER_LIMIT_SHOW(0, step_uw)
+POWER_LIMIT_SHOW(0, tmin_us)
+POWER_LIMIT_SHOW(0, tmax_us)
+
+POWER_LIMIT_SHOW(1, min_uw)
+POWER_LIMIT_SHOW(1, max_uw)
+POWER_LIMIT_SHOW(1, step_uw)
+POWER_LIMIT_SHOW(1, tmin_us)
+POWER_LIMIT_SHOW(1, tmax_us)
+
+static DEVICE_ATTR_RO(power_limit_0_min_uw);
+static DEVICE_ATTR_RO(power_limit_0_max_uw);
+static DEVICE_ATTR_RO(power_limit_0_step_uw);
+static DEVICE_ATTR_RO(power_limit_0_tmin_us);
+static DEVICE_ATTR_RO(power_limit_0_tmax_us);
+
+static DEVICE_ATTR_RO(power_limit_1_min_uw);
+static DEVICE_ATTR_RO(power_limit_1_max_uw);
+static DEVICE_ATTR_RO(power_limit_1_step_uw);
+static DEVICE_ATTR_RO(power_limit_1_tmin_us);
+static DEVICE_ATTR_RO(power_limit_1_tmax_us);
+
+static struct attribute *power_limit_attrs[] = {
+ &dev_attr_power_limit_0_min_uw.attr,
+ &dev_attr_power_limit_1_min_uw.attr,
+ &dev_attr_power_limit_0_max_uw.attr,
+ &dev_attr_power_limit_1_max_uw.attr,
+ &dev_attr_power_limit_0_step_uw.attr,
+ &dev_attr_power_limit_1_step_uw.attr,
+ &dev_attr_power_limit_0_tmin_us.attr,
+ &dev_attr_power_limit_1_tmin_us.attr,
+ &dev_attr_power_limit_0_tmax_us.attr,
+ &dev_attr_power_limit_1_tmax_us.attr,
+ NULL
+};
+
+static const struct attribute_group power_limit_attribute_group = {
+ .attrs = power_limit_attrs,
+ .name = "power_limits"
+};
+
+static ssize_t tcc_offset_degree_celsius_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int offset;
+
+ offset = intel_tcc_get_offset(-1);
+ if (offset < 0)
+ return offset;
+
+ return sprintf(buf, "%d\n", offset);
+}
+
+static ssize_t tcc_offset_degree_celsius_store(struct device *dev,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ unsigned int tcc;
+ u64 val;
+ int err;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+ if (err)
+ return err;
+
+ if (!(val & BIT(30)))
+ return -EACCES;
+
+ if (kstrtouint(buf, 0, &tcc))
+ return -EINVAL;
+
+ err = intel_tcc_set_offset(-1, tcc);
+ if (err)
+ return err;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(tcc_offset_degree_celsius);
+
+static int proc_thermal_get_zone_temp(struct thermal_zone_device *zone,
+ int *temp)
+{
+ int cpu;
+ int curr_temp;
+
+ *temp = 0;
+
+ for_each_online_cpu(cpu) {
+ curr_temp = intel_tcc_get_temp(cpu, false);
+ if (curr_temp < 0)
+ return curr_temp;
+ if (!*temp || curr_temp > *temp)
+ *temp = curr_temp;
+ }
+
+ *temp *= 1000;
+
+ return 0;
+}
+
+static int proc_thermal_read_ppcc(struct proc_thermal_device *proc_priv)
+{
+ int i;
+ acpi_status status;
+ struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+ union acpi_object *elements, *ppcc;
+ union acpi_object *p;
+ int ret = 0;
+
+ status = acpi_evaluate_object(proc_priv->adev->handle, "PPCC",
+ NULL, &buf);
+ if (ACPI_FAILURE(status))
+ return -ENODEV;
+
+ p = buf.pointer;
+ if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+ dev_err(proc_priv->dev, "Invalid PPCC data\n");
+ ret = -EFAULT;
+ goto free_buffer;
+ }
+
+ if (!p->package.count) {
+ dev_err(proc_priv->dev, "Invalid PPCC package size\n");
+ ret = -EFAULT;
+ goto free_buffer;
+ }
+
+ for (i = 0; i < min((int)p->package.count - 1, 2); ++i) {
+ elements = &(p->package.elements[i+1]);
+ if (elements->type != ACPI_TYPE_PACKAGE ||
+ elements->package.count != 6) {
+ ret = -EFAULT;
+ goto free_buffer;
+ }
+ ppcc = elements->package.elements;
+ proc_priv->power_limits[i].index = ppcc[0].integer.value;
+ proc_priv->power_limits[i].min_uw = ppcc[1].integer.value;
+ proc_priv->power_limits[i].max_uw = ppcc[2].integer.value;
+ proc_priv->power_limits[i].tmin_us = ppcc[3].integer.value;
+ proc_priv->power_limits[i].tmax_us = ppcc[4].integer.value;
+ proc_priv->power_limits[i].step_uw = ppcc[5].integer.value;
+ }
+
+free_buffer:
+ kfree(buf.pointer);
+
+ return ret;
+}
+
+#define PROC_POWER_CAPABILITY_CHANGED 0x83
+static void proc_thermal_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct proc_thermal_device *proc_priv = data;
+
+ if (!proc_priv)
+ return;
+
+ switch (event) {
+ case PROC_POWER_CAPABILITY_CHANGED:
+ proc_thermal_read_ppcc(proc_priv);
+ int340x_thermal_zone_device_update(proc_priv->int340x_zone,
+ THERMAL_DEVICE_POWER_CAPABILITY_CHANGED);
+ break;
+ default:
+ dev_dbg(proc_priv->dev, "Unsupported event [0x%x]\n", event);
+ break;
+ }
+}
+
+int proc_thermal_add(struct device *dev, struct proc_thermal_device *proc_priv)
+{
+ struct acpi_device *adev;
+ acpi_status status;
+ unsigned long long tmp;
+ int (*get_temp) (struct thermal_zone_device *, int *) = NULL;
+ int ret;
+
+ adev = ACPI_COMPANION(dev);
+ if (!adev)
+ return -ENODEV;
+
+ proc_priv->dev = dev;
+ proc_priv->adev = adev;
+
+ ret = proc_thermal_read_ppcc(proc_priv);
+ if (ret)
+ return ret;
+
+ status = acpi_evaluate_integer(adev->handle, "_TMP", NULL, &tmp);
+ if (ACPI_FAILURE(status)) {
+ /* there is no _TMP method, add local method */
+ if (intel_tcc_get_tjmax(-1) > 0)
+ get_temp = proc_thermal_get_zone_temp;
+ }
+
+ proc_priv->int340x_zone = int340x_thermal_zone_add(adev, get_temp);
+ if (IS_ERR(proc_priv->int340x_zone)) {
+ return PTR_ERR(proc_priv->int340x_zone);
+ } else
+ ret = 0;
+
+ ret = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
+ proc_thermal_notify,
+ (void *)proc_priv);
+ if (ret)
+ goto remove_zone;
+
+ ret = sysfs_create_file(&dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+ if (ret)
+ goto remove_notify;
+
+ ret = sysfs_create_group(&dev->kobj, &power_limit_attribute_group);
+ if (ret) {
+ sysfs_remove_file(&dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+ goto remove_notify;
+ }
+
+ return 0;
+
+remove_notify:
+ acpi_remove_notify_handler(adev->handle,
+ ACPI_DEVICE_NOTIFY, proc_thermal_notify);
+remove_zone:
+ int340x_thermal_zone_remove(proc_priv->int340x_zone);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_add);
+
+void proc_thermal_remove(struct proc_thermal_device *proc_priv)
+{
+ acpi_remove_notify_handler(proc_priv->adev->handle,
+ ACPI_DEVICE_NOTIFY, proc_thermal_notify);
+ int340x_thermal_zone_remove(proc_priv->int340x_zone);
+ sysfs_remove_file(&proc_priv->dev->kobj, &dev_attr_tcc_offset_degree_celsius.attr);
+ sysfs_remove_group(&proc_priv->dev->kobj,
+ &power_limit_attribute_group);
+}
+EXPORT_SYMBOL_GPL(proc_thermal_remove);
+
+static int tcc_offset_save = -1;
+
+int proc_thermal_suspend(struct device *dev)
+{
+ tcc_offset_save = intel_tcc_get_offset(-1);
+ if (tcc_offset_save < 0)
+ dev_warn(dev, "failed to save offset (%d)\n", tcc_offset_save);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_suspend);
+
+int proc_thermal_resume(struct device *dev)
+{
+ struct proc_thermal_device *proc_dev;
+
+ proc_dev = dev_get_drvdata(dev);
+ proc_thermal_read_ppcc(proc_dev);
+
+ /* Do not update if saving failed */
+ if (tcc_offset_save >= 0)
+ intel_tcc_set_offset(-1, tcc_offset_save);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_resume);
+
+#define MCHBAR 0
+
+static int proc_thermal_set_mmio_base(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ int ret;
+
+ ret = pcim_iomap_regions(pdev, 1 << MCHBAR, DRV_NAME);
+ if (ret) {
+ dev_err(&pdev->dev, "cannot reserve PCI memory region\n");
+ return -ENOMEM;
+ }
+
+ proc_priv->mmio_base = pcim_iomap_table(pdev)[MCHBAR];
+
+ return 0;
+}
+
+int proc_thermal_mmio_add(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv,
+ kernel_ulong_t feature_mask)
+{
+ int ret;
+
+ proc_priv->mmio_feature_mask = feature_mask;
+
+ if (feature_mask) {
+ ret = proc_thermal_set_mmio_base(pdev, proc_priv);
+ if (ret)
+ return ret;
+ }
+
+ if (feature_mask & PROC_THERMAL_FEATURE_RAPL) {
+ ret = proc_thermal_rapl_add(pdev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to add RAPL MMIO interface\n");
+ return ret;
+ }
+ }
+
+ if (feature_mask & PROC_THERMAL_FEATURE_FIVR ||
+ feature_mask & PROC_THERMAL_FEATURE_DVFS ||
+ feature_mask & PROC_THERMAL_FEATURE_DLVR) {
+ ret = proc_thermal_rfim_add(pdev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to add RFIM interface\n");
+ goto err_rem_rapl;
+ }
+ }
+
+ if (feature_mask & PROC_THERMAL_FEATURE_MBOX) {
+ ret = proc_thermal_mbox_add(pdev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to add MBOX interface\n");
+ goto err_rem_rfim;
+ }
+ }
+
+ return 0;
+
+err_rem_rfim:
+ proc_thermal_rfim_remove(pdev);
+err_rem_rapl:
+ proc_thermal_rapl_remove();
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_mmio_add);
+
+void proc_thermal_mmio_remove(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_RAPL)
+ proc_thermal_rapl_remove();
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_FIVR ||
+ proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DVFS)
+ proc_thermal_rfim_remove(pdev);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_MBOX)
+ proc_thermal_mbox_remove(pdev);
+}
+EXPORT_SYMBOL_GPL(proc_thermal_mmio_remove);
+
+MODULE_IMPORT_NS(INTEL_TCC);
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
new file mode 100644
index 0000000000..7acaa8f1b8
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * processor_thermal_device.h
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#ifndef __PROCESSOR_THERMAL_DEVICE_H__
+#define __PROCESSOR_THERMAL_DEVICE_H__
+
+#include <linux/intel_rapl.h>
+
+#define PCI_DEVICE_ID_INTEL_ADL_THERMAL 0x461d
+#define PCI_DEVICE_ID_INTEL_BDW_THERMAL 0x1603
+#define PCI_DEVICE_ID_INTEL_BSW_THERMAL 0x22DC
+
+#define PCI_DEVICE_ID_INTEL_BXT0_THERMAL 0x0A8C
+#define PCI_DEVICE_ID_INTEL_BXT1_THERMAL 0x1A8C
+#define PCI_DEVICE_ID_INTEL_BXTX_THERMAL 0x4A8C
+#define PCI_DEVICE_ID_INTEL_BXTP_THERMAL 0x5A8C
+
+#define PCI_DEVICE_ID_INTEL_CNL_THERMAL 0x5a03
+#define PCI_DEVICE_ID_INTEL_CFL_THERMAL 0x3E83
+#define PCI_DEVICE_ID_INTEL_GLK_THERMAL 0x318C
+#define PCI_DEVICE_ID_INTEL_HSB_THERMAL 0x0A03
+#define PCI_DEVICE_ID_INTEL_ICL_THERMAL 0x8a03
+#define PCI_DEVICE_ID_INTEL_JSL_THERMAL 0x4E03
+#define PCI_DEVICE_ID_INTEL_MTLP_THERMAL 0x7D03
+#define PCI_DEVICE_ID_INTEL_RPL_THERMAL 0xA71D
+#define PCI_DEVICE_ID_INTEL_SKL_THERMAL 0x1903
+#define PCI_DEVICE_ID_INTEL_TGL_THERMAL 0x9A03
+
+struct power_config {
+ u32 index;
+ u32 min_uw;
+ u32 max_uw;
+ u32 tmin_us;
+ u32 tmax_us;
+ u32 step_uw;
+};
+
+struct proc_thermal_device {
+ struct device *dev;
+ struct acpi_device *adev;
+ struct power_config power_limits[2];
+ struct int34x_thermal_zone *int340x_zone;
+ struct intel_soc_dts_sensors *soc_dts;
+ u32 mmio_feature_mask;
+ void __iomem *mmio_base;
+ void *priv_data;
+};
+
+struct rapl_mmio_regs {
+ u64 reg_unit;
+ u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
+ int limits[RAPL_DOMAIN_MAX];
+};
+
+#define PROC_THERMAL_FEATURE_NONE 0x00
+#define PROC_THERMAL_FEATURE_RAPL 0x01
+#define PROC_THERMAL_FEATURE_FIVR 0x02
+#define PROC_THERMAL_FEATURE_DVFS 0x04
+#define PROC_THERMAL_FEATURE_MBOX 0x08
+#define PROC_THERMAL_FEATURE_DLVR 0x10
+
+#if IS_ENABLED(CONFIG_PROC_THERMAL_MMIO_RAPL)
+int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_rapl_remove(void);
+#else
+static int __maybe_unused proc_thermal_rapl_add(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv)
+{
+ return 0;
+}
+
+static void __maybe_unused proc_thermal_rapl_remove(void)
+{
+}
+#endif
+
+int proc_thermal_rfim_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_rfim_remove(struct pci_dev *pdev);
+
+int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+void proc_thermal_mbox_remove(struct pci_dev *pdev);
+
+int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp);
+int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data);
+int proc_thermal_add(struct device *dev, struct proc_thermal_device *priv);
+void proc_thermal_remove(struct proc_thermal_device *proc_priv);
+int proc_thermal_suspend(struct device *dev);
+int proc_thermal_resume(struct device *dev);
+int proc_thermal_mmio_add(struct pci_dev *pdev,
+ struct proc_thermal_device *proc_priv,
+ kernel_ulong_t feature_mask);
+void proc_thermal_mmio_remove(struct pci_dev *pdev, struct proc_thermal_device *proc_priv);
+#endif
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
new file mode 100644
index 0000000000..0d1e980072
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Processor thermal device for newer processors
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/thermal.h>
+
+#include "int340x_thermal_zone.h"
+#include "processor_thermal_device.h"
+
+#define DRV_NAME "proc_thermal_pci"
+
+struct proc_thermal_pci {
+ struct pci_dev *pdev;
+ struct proc_thermal_device *proc_priv;
+ struct thermal_zone_device *tzone;
+ struct delayed_work work;
+ int stored_thres;
+ int no_legacy;
+};
+
+enum proc_thermal_mmio_type {
+ PROC_THERMAL_MMIO_TJMAX,
+ PROC_THERMAL_MMIO_PP0_TEMP,
+ PROC_THERMAL_MMIO_PP1_TEMP,
+ PROC_THERMAL_MMIO_PKG_TEMP,
+ PROC_THERMAL_MMIO_THRES_0,
+ PROC_THERMAL_MMIO_THRES_1,
+ PROC_THERMAL_MMIO_INT_ENABLE_0,
+ PROC_THERMAL_MMIO_INT_ENABLE_1,
+ PROC_THERMAL_MMIO_INT_STATUS_0,
+ PROC_THERMAL_MMIO_INT_STATUS_1,
+ PROC_THERMAL_MMIO_MAX
+};
+
+struct proc_thermal_mmio_info {
+ enum proc_thermal_mmio_type mmio_type;
+ u64 mmio_addr;
+ u64 shift;
+ u64 mask;
+};
+
+static struct proc_thermal_mmio_info proc_thermal_mmio_info[] = {
+ { PROC_THERMAL_MMIO_TJMAX, 0x599c, 16, 0xff },
+ { PROC_THERMAL_MMIO_PP0_TEMP, 0x597c, 0, 0xff },
+ { PROC_THERMAL_MMIO_PP1_TEMP, 0x5980, 0, 0xff },
+ { PROC_THERMAL_MMIO_PKG_TEMP, 0x5978, 0, 0xff },
+ { PROC_THERMAL_MMIO_THRES_0, 0x5820, 8, 0x7F },
+ { PROC_THERMAL_MMIO_THRES_1, 0x5820, 16, 0x7F },
+ { PROC_THERMAL_MMIO_INT_ENABLE_0, 0x5820, 15, 0x01 },
+ { PROC_THERMAL_MMIO_INT_ENABLE_1, 0x5820, 23, 0x01 },
+ { PROC_THERMAL_MMIO_INT_STATUS_0, 0x7200, 6, 0x01 },
+ { PROC_THERMAL_MMIO_INT_STATUS_1, 0x7200, 8, 0x01 },
+};
+
+#define B0D4_THERMAL_NOTIFY_DELAY 1000
+static int notify_delay_ms = B0D4_THERMAL_NOTIFY_DELAY;
+
+static void proc_thermal_mmio_read(struct proc_thermal_pci *pci_info,
+ enum proc_thermal_mmio_type type,
+ u32 *value)
+{
+ *value = ioread32(((u8 __iomem *)pci_info->proc_priv->mmio_base +
+ proc_thermal_mmio_info[type].mmio_addr));
+ *value >>= proc_thermal_mmio_info[type].shift;
+ *value &= proc_thermal_mmio_info[type].mask;
+}
+
+static void proc_thermal_mmio_write(struct proc_thermal_pci *pci_info,
+ enum proc_thermal_mmio_type type,
+ u32 value)
+{
+ u32 current_val;
+ u32 mask;
+
+ current_val = ioread32(((u8 __iomem *)pci_info->proc_priv->mmio_base +
+ proc_thermal_mmio_info[type].mmio_addr));
+ mask = proc_thermal_mmio_info[type].mask << proc_thermal_mmio_info[type].shift;
+ current_val &= ~mask;
+
+ value &= proc_thermal_mmio_info[type].mask;
+ value <<= proc_thermal_mmio_info[type].shift;
+
+ current_val |= value;
+ iowrite32(current_val, ((u8 __iomem *)pci_info->proc_priv->mmio_base +
+ proc_thermal_mmio_info[type].mmio_addr));
+}
+
+/*
+ * To avoid sending two many messages to user space, we have 1 second delay.
+ * On interrupt we are disabling interrupt and enabling after 1 second.
+ * This workload function is delayed by 1 second.
+ */
+static void proc_thermal_threshold_work_fn(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct proc_thermal_pci *pci_info = container_of(delayed_work,
+ struct proc_thermal_pci, work);
+ struct thermal_zone_device *tzone = pci_info->tzone;
+
+ if (tzone)
+ thermal_zone_device_update(tzone, THERMAL_TRIP_VIOLATED);
+
+ /* Enable interrupt flag */
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 1);
+}
+
+static void pkg_thermal_schedule_work(struct delayed_work *work)
+{
+ unsigned long ms = msecs_to_jiffies(notify_delay_ms);
+
+ schedule_delayed_work(work, ms);
+}
+
+static irqreturn_t proc_thermal_irq_handler(int irq, void *devid)
+{
+ struct proc_thermal_pci *pci_info = devid;
+ u32 status;
+
+ proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_INT_STATUS_0, &status);
+
+ /* Disable enable interrupt flag */
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
+ pci_write_config_byte(pci_info->pdev, 0xdc, 0x01);
+
+ pkg_thermal_schedule_work(&pci_info->work);
+
+ return IRQ_HANDLED;
+}
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
+{
+ struct proc_thermal_pci *pci_info = thermal_zone_device_priv(tzd);
+ u32 _temp;
+
+ proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_PKG_TEMP, &_temp);
+ *temp = (unsigned long)_temp * 1000;
+
+ return 0;
+}
+
+static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
+{
+ struct proc_thermal_pci *pci_info = thermal_zone_device_priv(tzd);
+ int tjmax, _temp;
+
+ if (temp <= 0) {
+ cancel_delayed_work_sync(&pci_info->work);
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0);
+ pci_info->stored_thres = 0;
+ return 0;
+ }
+
+ proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_TJMAX, &tjmax);
+ _temp = tjmax - (temp / 1000);
+ if (_temp < 0)
+ return -EINVAL;
+
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, _temp);
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 1);
+
+ pci_info->stored_thres = temp;
+
+ return 0;
+}
+
+static int get_trip_temp(struct proc_thermal_pci *pci_info)
+{
+ int temp, tjmax;
+
+ proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_THRES_0, &temp);
+ if (!temp)
+ return THERMAL_TEMP_INVALID;
+
+ proc_thermal_mmio_read(pci_info, PROC_THERMAL_MMIO_TJMAX, &tjmax);
+ temp = (tjmax - temp) * 1000;
+
+ return temp;
+}
+
+static struct thermal_trip psv_trip = {
+ .type = THERMAL_TRIP_PASSIVE,
+};
+
+static struct thermal_zone_device_ops tzone_ops = {
+ .get_temp = sys_get_curr_temp,
+ .set_trip_temp = sys_set_trip_temp,
+};
+
+static struct thermal_zone_params tzone_params = {
+ .governor_name = "user_space",
+ .no_hwmon = true,
+};
+
+static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct proc_thermal_device *proc_priv;
+ struct proc_thermal_pci *pci_info;
+ int irq_flag = 0, irq, ret;
+
+ proc_priv = devm_kzalloc(&pdev->dev, sizeof(*proc_priv), GFP_KERNEL);
+ if (!proc_priv)
+ return -ENOMEM;
+
+ pci_info = devm_kzalloc(&pdev->dev, sizeof(*pci_info), GFP_KERNEL);
+ if (!pci_info)
+ return -ENOMEM;
+
+ pci_info->pdev = pdev;
+ ret = pcim_enable_device(pdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "error: could not enable device\n");
+ return ret;
+ }
+
+ pci_set_master(pdev);
+
+ INIT_DELAYED_WORK(&pci_info->work, proc_thermal_threshold_work_fn);
+
+ ret = proc_thermal_add(&pdev->dev, proc_priv);
+ if (ret) {
+ dev_err(&pdev->dev, "error: proc_thermal_add, will continue\n");
+ pci_info->no_legacy = 1;
+ }
+
+ proc_priv->priv_data = pci_info;
+ pci_info->proc_priv = proc_priv;
+ pci_set_drvdata(pdev, proc_priv);
+
+ ret = proc_thermal_mmio_add(pdev, proc_priv, id->driver_data);
+ if (ret)
+ goto err_ret_thermal;
+
+ psv_trip.temperature = get_trip_temp(pci_info);
+
+ pci_info->tzone = thermal_zone_device_register_with_trips("TCPU_PCI", &psv_trip,
+ 1, 1, pci_info,
+ &tzone_ops,
+ &tzone_params, 0, 0);
+ if (IS_ERR(pci_info->tzone)) {
+ ret = PTR_ERR(pci_info->tzone);
+ goto err_ret_mmio;
+ }
+
+ /* request and enable interrupt */
+ ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to allocate vectors!\n");
+ goto err_ret_tzone;
+ }
+ if (!pdev->msi_enabled && !pdev->msix_enabled)
+ irq_flag = IRQF_SHARED;
+
+ irq = pci_irq_vector(pdev, 0);
+ ret = devm_request_threaded_irq(&pdev->dev, irq,
+ proc_thermal_irq_handler, NULL,
+ irq_flag, KBUILD_MODNAME, pci_info);
+ if (ret) {
+ dev_err(&pdev->dev, "Request IRQ %d failed\n", pdev->irq);
+ goto err_free_vectors;
+ }
+
+ ret = thermal_zone_device_enable(pci_info->tzone);
+ if (ret)
+ goto err_free_vectors;
+
+ return 0;
+
+err_free_vectors:
+ pci_free_irq_vectors(pdev);
+err_ret_tzone:
+ thermal_zone_device_unregister(pci_info->tzone);
+err_ret_mmio:
+ proc_thermal_mmio_remove(pdev, proc_priv);
+err_ret_thermal:
+ if (!pci_info->no_legacy)
+ proc_thermal_remove(proc_priv);
+ pci_disable_device(pdev);
+
+ return ret;
+}
+
+static void proc_thermal_pci_remove(struct pci_dev *pdev)
+{
+ struct proc_thermal_device *proc_priv = pci_get_drvdata(pdev);
+ struct proc_thermal_pci *pci_info = proc_priv->priv_data;
+
+ cancel_delayed_work_sync(&pci_info->work);
+
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0);
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0);
+
+ devm_free_irq(&pdev->dev, pdev->irq, pci_info);
+ pci_free_irq_vectors(pdev);
+
+ thermal_zone_device_unregister(pci_info->tzone);
+ proc_thermal_mmio_remove(pdev, pci_info->proc_priv);
+ if (!pci_info->no_legacy)
+ proc_thermal_remove(proc_priv);
+ pci_disable_device(pdev);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int proc_thermal_pci_suspend(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct proc_thermal_device *proc_priv;
+ struct proc_thermal_pci *pci_info;
+
+ proc_priv = pci_get_drvdata(pdev);
+ pci_info = proc_priv->priv_data;
+
+ if (!pci_info->no_legacy)
+ return proc_thermal_suspend(dev);
+
+ return 0;
+}
+static int proc_thermal_pci_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct proc_thermal_device *proc_priv;
+ struct proc_thermal_pci *pci_info;
+
+ proc_priv = pci_get_drvdata(pdev);
+ pci_info = proc_priv->priv_data;
+
+ if (pci_info->stored_thres) {
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0,
+ pci_info->stored_thres / 1000);
+ proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 1);
+ }
+
+ if (!pci_info->no_legacy)
+ return proc_thermal_resume(dev);
+
+ return 0;
+}
+#else
+#define proc_thermal_pci_suspend NULL
+#define proc_thermal_pci_resume NULL
+#endif
+
+static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
+ proc_thermal_pci_resume);
+
+static const struct pci_device_id proc_thermal_pci_ids[] = {
+ { PCI_DEVICE_DATA(INTEL, ADL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+ { PCI_DEVICE_DATA(INTEL, MTLP_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX | PROC_THERMAL_FEATURE_DLVR) },
+ { PCI_DEVICE_DATA(INTEL, RPL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_DVFS | PROC_THERMAL_FEATURE_MBOX) },
+ { },
+};
+
+MODULE_DEVICE_TABLE(pci, proc_thermal_pci_ids);
+
+static struct pci_driver proc_thermal_pci_driver = {
+ .name = DRV_NAME,
+ .probe = proc_thermal_pci_probe,
+ .remove = proc_thermal_pci_remove,
+ .id_table = proc_thermal_pci_ids,
+ .driver.pm = &proc_thermal_pci_pm,
+};
+
+module_pci_driver(proc_thermal_pci_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
new file mode 100644
index 0000000000..16fd9df5f3
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci_legacy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * B0D4 processor thermal device
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/thermal.h>
+
+#include "int340x_thermal_zone.h"
+#include "processor_thermal_device.h"
+#include "../intel_soc_dts_iosf.h"
+
+#define DRV_NAME "proc_thermal"
+
+static irqreturn_t proc_thermal_pci_msi_irq(int irq, void *devid)
+{
+ struct proc_thermal_device *proc_priv;
+ struct pci_dev *pdev = devid;
+
+ proc_priv = pci_get_drvdata(pdev);
+
+ intel_soc_dts_iosf_interrupt_handler(proc_priv->soc_dts);
+
+ return IRQ_HANDLED;
+}
+
+static int proc_thermal_pci_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ struct proc_thermal_device *proc_priv;
+ int ret;
+
+ ret = pcim_enable_device(pdev);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "error: could not enable device\n");
+ return ret;
+ }
+
+ proc_priv = devm_kzalloc(&pdev->dev, sizeof(*proc_priv), GFP_KERNEL);
+ if (!proc_priv)
+ return -ENOMEM;
+
+ ret = proc_thermal_add(&pdev->dev, proc_priv);
+ if (ret)
+ return ret;
+
+ pci_set_drvdata(pdev, proc_priv);
+
+ if (pdev->device == PCI_DEVICE_ID_INTEL_BSW_THERMAL) {
+ /*
+ * Enumerate additional DTS sensors available via IOSF.
+ * But we are not treating as a failure condition, if
+ * there are no aux DTSs enabled or fails. This driver
+ * already exposes sensors, which can be accessed via
+ * ACPI/MSR. So we don't want to fail for auxiliary DTSs.
+ */
+ proc_priv->soc_dts = intel_soc_dts_iosf_init(
+ INTEL_SOC_DTS_INTERRUPT_MSI, false, 0);
+
+ if (!IS_ERR(proc_priv->soc_dts) && pdev->irq) {
+ ret = pci_enable_msi(pdev);
+ if (!ret) {
+ ret = request_threaded_irq(pdev->irq, NULL,
+ proc_thermal_pci_msi_irq,
+ IRQF_ONESHOT, "proc_thermal",
+ pdev);
+ if (ret) {
+ intel_soc_dts_iosf_exit(
+ proc_priv->soc_dts);
+ pci_disable_msi(pdev);
+ proc_priv->soc_dts = NULL;
+ }
+ }
+ } else
+ dev_err(&pdev->dev, "No auxiliary DTSs enabled\n");
+ } else {
+
+ }
+
+ ret = proc_thermal_mmio_add(pdev, proc_priv, id->driver_data);
+ if (ret) {
+ proc_thermal_remove(proc_priv);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void proc_thermal_pci_remove(struct pci_dev *pdev)
+{
+ struct proc_thermal_device *proc_priv = pci_get_drvdata(pdev);
+
+ if (proc_priv->soc_dts) {
+ intel_soc_dts_iosf_exit(proc_priv->soc_dts);
+ if (pdev->irq) {
+ free_irq(pdev->irq, pdev);
+ pci_disable_msi(pdev);
+ }
+ }
+
+ proc_thermal_mmio_remove(pdev, proc_priv);
+ proc_thermal_remove(proc_priv);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int proc_thermal_pci_suspend(struct device *dev)
+{
+ return proc_thermal_suspend(dev);
+}
+static int proc_thermal_pci_resume(struct device *dev)
+{
+ return proc_thermal_resume(dev);
+}
+#else
+#define proc_thermal_pci_suspend NULL
+#define proc_thermal_pci_resume NULL
+#endif
+
+static SIMPLE_DEV_PM_OPS(proc_thermal_pci_pm, proc_thermal_pci_suspend,
+ proc_thermal_pci_resume);
+
+static const struct pci_device_id proc_thermal_pci_ids[] = {
+ { PCI_DEVICE_DATA(INTEL, BDW_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, BSW_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, BXT0_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, BXT1_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, BXTX_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, BXTP_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, CNL_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, CFL_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, GLK_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, HSB_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, ICL_THERMAL, PROC_THERMAL_FEATURE_RAPL) },
+ { PCI_DEVICE_DATA(INTEL, JSL_THERMAL, 0) },
+ { PCI_DEVICE_DATA(INTEL, SKL_THERMAL, PROC_THERMAL_FEATURE_RAPL) },
+ { PCI_DEVICE_DATA(INTEL, TGL_THERMAL, PROC_THERMAL_FEATURE_RAPL | PROC_THERMAL_FEATURE_FIVR | PROC_THERMAL_FEATURE_MBOX) },
+ { },
+};
+
+MODULE_DEVICE_TABLE(pci, proc_thermal_pci_ids);
+
+static struct pci_driver proc_thermal_pci_driver = {
+ .name = DRV_NAME,
+ .probe = proc_thermal_pci_probe,
+ .remove = proc_thermal_pci_remove,
+ .id_table = proc_thermal_pci_ids,
+ .driver.pm = &proc_thermal_pci_pm,
+};
+
+module_pci_driver(proc_thermal_pci_driver);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
new file mode 100644
index 0000000000..0b89a4340f
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_mbox.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device mailbox driver for Workload type hints
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "processor_thermal_device.h"
+
+#define MBOX_CMD_WORKLOAD_TYPE_READ 0x0E
+#define MBOX_CMD_WORKLOAD_TYPE_WRITE 0x0F
+
+#define MBOX_OFFSET_DATA 0x5810
+#define MBOX_OFFSET_INTERFACE 0x5818
+
+#define MBOX_BUSY_BIT 31
+#define MBOX_RETRY_COUNT 100
+
+#define MBOX_DATA_BIT_VALID 31
+#define MBOX_DATA_BIT_AC_DC 30
+
+static DEFINE_MUTEX(mbox_lock);
+
+static int wait_for_mbox_ready(struct proc_thermal_device *proc_priv)
+{
+ u32 retries, data;
+ int ret;
+
+ /* Poll for rb bit == 0 */
+ retries = MBOX_RETRY_COUNT;
+ do {
+ data = readl(proc_priv->mmio_base + MBOX_OFFSET_INTERFACE);
+ if (data & BIT_ULL(MBOX_BUSY_BIT)) {
+ ret = -EBUSY;
+ continue;
+ }
+ ret = 0;
+ break;
+ } while (--retries);
+
+ return ret;
+}
+
+static int send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
+{
+ struct proc_thermal_device *proc_priv;
+ u32 reg_data;
+ int ret;
+
+ proc_priv = pci_get_drvdata(pdev);
+
+ mutex_lock(&mbox_lock);
+
+ ret = wait_for_mbox_ready(proc_priv);
+ if (ret)
+ goto unlock_mbox;
+
+ writel(data, (proc_priv->mmio_base + MBOX_OFFSET_DATA));
+ /* Write command register */
+ reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
+ writel(reg_data, (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
+
+ ret = wait_for_mbox_ready(proc_priv);
+
+unlock_mbox:
+ mutex_unlock(&mbox_lock);
+ return ret;
+}
+
+static int send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
+{
+ struct proc_thermal_device *proc_priv;
+ u32 reg_data;
+ int ret;
+
+ proc_priv = pci_get_drvdata(pdev);
+
+ mutex_lock(&mbox_lock);
+
+ ret = wait_for_mbox_ready(proc_priv);
+ if (ret)
+ goto unlock_mbox;
+
+ /* Write command register */
+ reg_data = BIT_ULL(MBOX_BUSY_BIT) | id;
+ writel(reg_data, (proc_priv->mmio_base + MBOX_OFFSET_INTERFACE));
+
+ ret = wait_for_mbox_ready(proc_priv);
+ if (ret)
+ goto unlock_mbox;
+
+ if (id == MBOX_CMD_WORKLOAD_TYPE_READ)
+ *resp = readl(proc_priv->mmio_base + MBOX_OFFSET_DATA);
+ else
+ *resp = readq(proc_priv->mmio_base + MBOX_OFFSET_DATA);
+
+unlock_mbox:
+ mutex_unlock(&mbox_lock);
+ return ret;
+}
+
+int processor_thermal_send_mbox_read_cmd(struct pci_dev *pdev, u16 id, u64 *resp)
+{
+ return send_mbox_read_cmd(pdev, id, resp);
+}
+EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_read_cmd, INT340X_THERMAL);
+
+int processor_thermal_send_mbox_write_cmd(struct pci_dev *pdev, u16 id, u32 data)
+{
+ return send_mbox_write_cmd(pdev, id, data);
+}
+EXPORT_SYMBOL_NS_GPL(processor_thermal_send_mbox_write_cmd, INT340X_THERMAL);
+
+/* List of workload types */
+static const char * const workload_types[] = {
+ "none",
+ "idle",
+ "semi_active",
+ "bursty",
+ "sustained",
+ "battery_life",
+ NULL
+};
+
+static ssize_t workload_available_types_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int i = 0;
+ int ret = 0;
+
+ while (workload_types[i] != NULL)
+ ret += sprintf(&buf[ret], "%s ", workload_types[i++]);
+
+ ret += sprintf(&buf[ret], "\n");
+
+ return ret;
+}
+
+static DEVICE_ATTR_RO(workload_available_types);
+
+static ssize_t workload_type_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ char str_preference[15];
+ u32 data = 0;
+ ssize_t ret;
+
+ ret = sscanf(buf, "%14s", str_preference);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = match_string(workload_types, -1, str_preference);
+ if (ret < 0)
+ return ret;
+
+ ret &= 0xff;
+
+ if (ret)
+ data = BIT(MBOX_DATA_BIT_VALID) | BIT(MBOX_DATA_BIT_AC_DC);
+
+ data |= ret;
+
+ ret = send_mbox_write_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_WRITE, data);
+ if (ret)
+ return false;
+
+ return count;
+}
+
+static ssize_t workload_type_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u64 cmd_resp;
+ int ret;
+
+ ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return false;
+
+ cmd_resp &= 0xff;
+
+ if (cmd_resp > ARRAY_SIZE(workload_types) - 1)
+ return -EINVAL;
+
+ return sprintf(buf, "%s\n", workload_types[cmd_resp]);
+}
+
+static DEVICE_ATTR_RW(workload_type);
+
+static struct attribute *workload_req_attrs[] = {
+ &dev_attr_workload_available_types.attr,
+ &dev_attr_workload_type.attr,
+ NULL
+};
+
+static const struct attribute_group workload_req_attribute_group = {
+ .attrs = workload_req_attrs,
+ .name = "workload_request"
+};
+
+static bool workload_req_created;
+
+int proc_thermal_mbox_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ u64 cmd_resp;
+ int ret;
+
+ /* Check if there is a mailbox support, if fails return success */
+ ret = send_mbox_read_cmd(pdev, MBOX_CMD_WORKLOAD_TYPE_READ, &cmd_resp);
+ if (ret)
+ return 0;
+
+ ret = sysfs_create_group(&pdev->dev.kobj, &workload_req_attribute_group);
+ if (ret)
+ return ret;
+
+ workload_req_created = true;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_mbox_add);
+
+void proc_thermal_mbox_remove(struct pci_dev *pdev)
+{
+ if (workload_req_created)
+ sysfs_remove_group(&pdev->dev.kobj, &workload_req_attribute_group);
+
+ workload_req_created = false;
+
+}
+EXPORT_SYMBOL_GPL(proc_thermal_mbox_remove);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
new file mode 100644
index 0000000000..2f00fc3bf2
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device RFIM control
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+static struct rapl_if_priv rapl_mmio_priv;
+
+static const struct rapl_mmio_regs rapl_mmio_default = {
+ .reg_unit = 0x5938,
+ .regs[RAPL_DOMAIN_PACKAGE] = { 0x59a0, 0x593c, 0x58f0, 0, 0x5930},
+ .regs[RAPL_DOMAIN_DRAM] = { 0x58e0, 0x58e8, 0x58ec, 0, 0},
+ .limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
+ .limits[RAPL_DOMAIN_DRAM] = BIT(POWER_LIMIT2),
+};
+
+static int rapl_mmio_cpu_online(unsigned int cpu)
+{
+ struct rapl_package *rp;
+
+ /* mmio rapl supports package 0 only for now */
+ if (topology_physical_package_id(cpu))
+ return 0;
+
+ rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ if (!rp) {
+ rp = rapl_add_package(cpu, &rapl_mmio_priv, true);
+ if (IS_ERR(rp))
+ return PTR_ERR(rp);
+ }
+ cpumask_set_cpu(cpu, &rp->cpumask);
+ return 0;
+}
+
+static int rapl_mmio_cpu_down_prep(unsigned int cpu)
+{
+ struct rapl_package *rp;
+ int lead_cpu;
+
+ rp = rapl_find_package_domain(cpu, &rapl_mmio_priv, true);
+ if (!rp)
+ return 0;
+
+ cpumask_clear_cpu(cpu, &rp->cpumask);
+ lead_cpu = cpumask_first(&rp->cpumask);
+ if (lead_cpu >= nr_cpu_ids)
+ rapl_remove_package(rp);
+ else if (rp->lead_cpu == cpu)
+ rp->lead_cpu = lead_cpu;
+ return 0;
+}
+
+static int rapl_mmio_read_raw(int cpu, struct reg_action *ra)
+{
+ if (!ra->reg.mmio)
+ return -EINVAL;
+
+ ra->value = readq(ra->reg.mmio);
+ ra->value &= ra->mask;
+ return 0;
+}
+
+static int rapl_mmio_write_raw(int cpu, struct reg_action *ra)
+{
+ u64 val;
+
+ if (!ra->reg.mmio)
+ return -EINVAL;
+
+ val = readq(ra->reg.mmio);
+ val &= ~ra->mask;
+ val |= ra->value;
+ writeq(val, ra->reg.mmio);
+ return 0;
+}
+
+int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ const struct rapl_mmio_regs *rapl_regs = &rapl_mmio_default;
+ enum rapl_domain_reg_id reg;
+ enum rapl_domain_type domain;
+ int ret;
+
+ if (!rapl_regs)
+ return 0;
+
+ for (domain = RAPL_DOMAIN_PACKAGE; domain < RAPL_DOMAIN_MAX; domain++) {
+ for (reg = RAPL_DOMAIN_REG_LIMIT; reg < RAPL_DOMAIN_REG_MAX; reg++)
+ if (rapl_regs->regs[domain][reg])
+ rapl_mmio_priv.regs[domain][reg].mmio =
+ proc_priv->mmio_base +
+ rapl_regs->regs[domain][reg];
+ rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
+ }
+ rapl_mmio_priv.type = RAPL_IF_MMIO;
+ rapl_mmio_priv.reg_unit.mmio = proc_priv->mmio_base + rapl_regs->reg_unit;
+
+ rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
+ rapl_mmio_priv.write_raw = rapl_mmio_write_raw;
+
+ rapl_mmio_priv.control_type = powercap_register_control_type(NULL, "intel-rapl-mmio", NULL);
+ if (IS_ERR(rapl_mmio_priv.control_type)) {
+ pr_debug("failed to register powercap control_type.\n");
+ return PTR_ERR(rapl_mmio_priv.control_type);
+ }
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powercap/rapl:online",
+ rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep);
+ if (ret < 0) {
+ powercap_unregister_control_type(rapl_mmio_priv.control_type);
+ rapl_mmio_priv.control_type = NULL;
+ return ret;
+ }
+ rapl_mmio_priv.pcap_rapl_online = ret;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_rapl_add);
+
+void proc_thermal_rapl_remove(void)
+{
+ if (IS_ERR_OR_NULL(rapl_mmio_priv.control_type))
+ return;
+
+ cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online);
+ powercap_unregister_control_type(rapl_mmio_priv.control_type);
+}
+EXPORT_SYMBOL_GPL(proc_thermal_rapl_remove);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
new file mode 100644
index 0000000000..546b704340
--- /dev/null
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rfim.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * processor thermal device RFIM control
+ * Copyright (c) 2020, Intel Corporation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "processor_thermal_device.h"
+
+MODULE_IMPORT_NS(INT340X_THERMAL);
+
+struct mmio_reg {
+ int read_only;
+ u32 offset;
+ int bits;
+ u16 mask;
+ u16 shift;
+};
+
+/* These will represent sysfs attribute names */
+static const char * const fivr_strings[] = {
+ "vco_ref_code_lo",
+ "vco_ref_code_hi",
+ "spread_spectrum_pct",
+ "spread_spectrum_clk_enable",
+ "rfi_vco_ref_code",
+ "fivr_fffc_rev",
+ NULL
+};
+
+static const struct mmio_reg tgl_fivr_mmio_regs[] = {
+ { 0, 0x5A18, 3, 0x7, 11}, /* vco_ref_code_lo */
+ { 0, 0x5A18, 8, 0xFF, 16}, /* vco_ref_code_hi */
+ { 0, 0x5A08, 8, 0xFF, 0}, /* spread_spectrum_pct */
+ { 0, 0x5A08, 1, 0x1, 8}, /* spread_spectrum_clk_enable */
+ { 1, 0x5A10, 12, 0xFFF, 0}, /* rfi_vco_ref_code */
+ { 1, 0x5A14, 2, 0x3, 1}, /* fivr_fffc_rev */
+};
+
+static const char * const dlvr_strings[] = {
+ "dlvr_spread_spectrum_pct",
+ "dlvr_control_mode",
+ "dlvr_control_lock",
+ "dlvr_rfim_enable",
+ "dlvr_freq_select",
+ "dlvr_hardware_rev",
+ "dlvr_freq_mhz",
+ "dlvr_pll_busy",
+ NULL
+};
+
+static const struct mmio_reg dlvr_mmio_regs[] = {
+ { 0, 0x15A08, 5, 0x1F, 0}, /* dlvr_spread_spectrum_pct */
+ { 0, 0x15A08, 1, 0x1, 5}, /* dlvr_control_mode */
+ { 0, 0x15A08, 1, 0x1, 6}, /* dlvr_control_lock */
+ { 0, 0x15A08, 1, 0x1, 7}, /* dlvr_rfim_enable */
+ { 0, 0x15A08, 12, 0xFFF, 8}, /* dlvr_freq_select */
+ { 1, 0x15A10, 2, 0x3, 30}, /* dlvr_hardware_rev */
+ { 1, 0x15A10, 16, 0xFFFF, 0}, /* dlvr_freq_mhz */
+ { 1, 0x15A10, 1, 0x1, 16}, /* dlvr_pll_busy */
+};
+
+/* These will represent sysfs attribute names */
+static const char * const dvfs_strings[] = {
+ "rfi_restriction_run_busy",
+ "rfi_restriction_err_code",
+ "rfi_restriction_data_rate",
+ "rfi_restriction_data_rate_base",
+ "ddr_data_rate_point_0",
+ "ddr_data_rate_point_1",
+ "ddr_data_rate_point_2",
+ "ddr_data_rate_point_3",
+ "rfi_disable",
+ NULL
+};
+
+static const struct mmio_reg adl_dvfs_mmio_regs[] = {
+ { 0, 0x5A38, 1, 0x1, 31}, /* rfi_restriction_run_busy */
+ { 0, 0x5A38, 7, 0x7F, 24}, /* rfi_restriction_err_code */
+ { 0, 0x5A38, 8, 0xFF, 16}, /* rfi_restriction_data_rate */
+ { 0, 0x5A38, 16, 0xFFFF, 0}, /* rfi_restriction_data_rate_base */
+ { 0, 0x5A30, 10, 0x3FF, 0}, /* ddr_data_rate_point_0 */
+ { 0, 0x5A30, 10, 0x3FF, 10}, /* ddr_data_rate_point_1 */
+ { 0, 0x5A30, 10, 0x3FF, 20}, /* ddr_data_rate_point_2 */
+ { 0, 0x5A30, 10, 0x3FF, 30}, /* ddr_data_rate_point_3 */
+ { 0, 0x5A40, 1, 0x1, 0}, /* rfi_disable */
+};
+
+#define RFIM_SHOW(suffix, table)\
+static ssize_t suffix##_show(struct device *dev,\
+ struct device_attribute *attr,\
+ char *buf)\
+{\
+ struct proc_thermal_device *proc_priv;\
+ struct pci_dev *pdev = to_pci_dev(dev);\
+ const struct mmio_reg *mmio_regs;\
+ const char **match_strs;\
+ u32 reg_val;\
+ int ret;\
+\
+ proc_priv = pci_get_drvdata(pdev);\
+ if (table == 1) {\
+ match_strs = (const char **)dvfs_strings;\
+ mmio_regs = adl_dvfs_mmio_regs;\
+ } else if (table == 2) { \
+ match_strs = (const char **)dlvr_strings;\
+ mmio_regs = dlvr_mmio_regs;\
+ } else {\
+ match_strs = (const char **)fivr_strings;\
+ mmio_regs = tgl_fivr_mmio_regs;\
+ } \
+ ret = match_string(match_strs, -1, attr->attr.name);\
+ if (ret < 0)\
+ return ret;\
+ reg_val = readl((void __iomem *) (proc_priv->mmio_base + mmio_regs[ret].offset));\
+ ret = (reg_val >> mmio_regs[ret].shift) & mmio_regs[ret].mask;\
+ return sprintf(buf, "%u\n", ret);\
+}
+
+#define RFIM_STORE(suffix, table)\
+static ssize_t suffix##_store(struct device *dev,\
+ struct device_attribute *attr,\
+ const char *buf, size_t count)\
+{\
+ struct proc_thermal_device *proc_priv;\
+ struct pci_dev *pdev = to_pci_dev(dev);\
+ unsigned int input;\
+ const char **match_strs;\
+ const struct mmio_reg *mmio_regs;\
+ int ret, err;\
+ u32 reg_val;\
+ u32 mask;\
+\
+ proc_priv = pci_get_drvdata(pdev);\
+ if (table == 1) {\
+ match_strs = (const char **)dvfs_strings;\
+ mmio_regs = adl_dvfs_mmio_regs;\
+ } else if (table == 2) { \
+ match_strs = (const char **)dlvr_strings;\
+ mmio_regs = dlvr_mmio_regs;\
+ } else {\
+ match_strs = (const char **)fivr_strings;\
+ mmio_regs = tgl_fivr_mmio_regs;\
+ } \
+ \
+ ret = match_string(match_strs, -1, attr->attr.name);\
+ if (ret < 0)\
+ return ret;\
+ if (mmio_regs[ret].read_only)\
+ return -EPERM;\
+ err = kstrtouint(buf, 10, &input);\
+ if (err)\
+ return err;\
+ mask = GENMASK(mmio_regs[ret].shift + mmio_regs[ret].bits - 1, mmio_regs[ret].shift);\
+ reg_val = readl((void __iomem *) (proc_priv->mmio_base + mmio_regs[ret].offset));\
+ reg_val &= ~mask;\
+ reg_val |= (input << mmio_regs[ret].shift);\
+ writel(reg_val, (void __iomem *) (proc_priv->mmio_base + mmio_regs[ret].offset));\
+ return count;\
+}
+
+RFIM_SHOW(vco_ref_code_lo, 0)
+RFIM_SHOW(vco_ref_code_hi, 0)
+RFIM_SHOW(spread_spectrum_pct, 0)
+RFIM_SHOW(spread_spectrum_clk_enable, 0)
+RFIM_SHOW(rfi_vco_ref_code, 0)
+RFIM_SHOW(fivr_fffc_rev, 0)
+
+RFIM_STORE(vco_ref_code_lo, 0)
+RFIM_STORE(vco_ref_code_hi, 0)
+RFIM_STORE(spread_spectrum_pct, 0)
+RFIM_STORE(spread_spectrum_clk_enable, 0)
+RFIM_STORE(rfi_vco_ref_code, 0)
+RFIM_STORE(fivr_fffc_rev, 0)
+
+RFIM_SHOW(dlvr_spread_spectrum_pct, 2)
+RFIM_SHOW(dlvr_control_mode, 2)
+RFIM_SHOW(dlvr_control_lock, 2)
+RFIM_SHOW(dlvr_hardware_rev, 2)
+RFIM_SHOW(dlvr_freq_mhz, 2)
+RFIM_SHOW(dlvr_pll_busy, 2)
+RFIM_SHOW(dlvr_freq_select, 2)
+RFIM_SHOW(dlvr_rfim_enable, 2)
+
+RFIM_STORE(dlvr_spread_spectrum_pct, 2)
+RFIM_STORE(dlvr_rfim_enable, 2)
+RFIM_STORE(dlvr_freq_select, 2)
+RFIM_STORE(dlvr_control_mode, 2)
+RFIM_STORE(dlvr_control_lock, 2)
+
+static DEVICE_ATTR_RW(dlvr_spread_spectrum_pct);
+static DEVICE_ATTR_RW(dlvr_control_mode);
+static DEVICE_ATTR_RW(dlvr_control_lock);
+static DEVICE_ATTR_RW(dlvr_freq_select);
+static DEVICE_ATTR_RO(dlvr_hardware_rev);
+static DEVICE_ATTR_RO(dlvr_freq_mhz);
+static DEVICE_ATTR_RO(dlvr_pll_busy);
+static DEVICE_ATTR_RW(dlvr_rfim_enable);
+
+static struct attribute *dlvr_attrs[] = {
+ &dev_attr_dlvr_spread_spectrum_pct.attr,
+ &dev_attr_dlvr_control_mode.attr,
+ &dev_attr_dlvr_control_lock.attr,
+ &dev_attr_dlvr_freq_select.attr,
+ &dev_attr_dlvr_hardware_rev.attr,
+ &dev_attr_dlvr_freq_mhz.attr,
+ &dev_attr_dlvr_pll_busy.attr,
+ &dev_attr_dlvr_rfim_enable.attr,
+ NULL
+};
+
+static const struct attribute_group dlvr_attribute_group = {
+ .attrs = dlvr_attrs,
+ .name = "dlvr"
+};
+
+static DEVICE_ATTR_RW(vco_ref_code_lo);
+static DEVICE_ATTR_RW(vco_ref_code_hi);
+static DEVICE_ATTR_RW(spread_spectrum_pct);
+static DEVICE_ATTR_RW(spread_spectrum_clk_enable);
+static DEVICE_ATTR_RW(rfi_vco_ref_code);
+static DEVICE_ATTR_RW(fivr_fffc_rev);
+
+static struct attribute *fivr_attrs[] = {
+ &dev_attr_vco_ref_code_lo.attr,
+ &dev_attr_vco_ref_code_hi.attr,
+ &dev_attr_spread_spectrum_pct.attr,
+ &dev_attr_spread_spectrum_clk_enable.attr,
+ &dev_attr_rfi_vco_ref_code.attr,
+ &dev_attr_fivr_fffc_rev.attr,
+ NULL
+};
+
+static const struct attribute_group fivr_attribute_group = {
+ .attrs = fivr_attrs,
+ .name = "fivr"
+};
+
+RFIM_SHOW(rfi_restriction_run_busy, 1)
+RFIM_SHOW(rfi_restriction_err_code, 1)
+RFIM_SHOW(rfi_restriction_data_rate, 1)
+RFIM_SHOW(rfi_restriction_data_rate_base, 1)
+RFIM_SHOW(ddr_data_rate_point_0, 1)
+RFIM_SHOW(ddr_data_rate_point_1, 1)
+RFIM_SHOW(ddr_data_rate_point_2, 1)
+RFIM_SHOW(ddr_data_rate_point_3, 1)
+RFIM_SHOW(rfi_disable, 1)
+
+RFIM_STORE(rfi_restriction_run_busy, 1)
+RFIM_STORE(rfi_restriction_err_code, 1)
+RFIM_STORE(rfi_restriction_data_rate, 1)
+RFIM_STORE(rfi_restriction_data_rate_base, 1)
+RFIM_STORE(rfi_disable, 1)
+
+static DEVICE_ATTR_RW(rfi_restriction_run_busy);
+static DEVICE_ATTR_RW(rfi_restriction_err_code);
+static DEVICE_ATTR_RW(rfi_restriction_data_rate);
+static DEVICE_ATTR_RW(rfi_restriction_data_rate_base);
+static DEVICE_ATTR_RO(ddr_data_rate_point_0);
+static DEVICE_ATTR_RO(ddr_data_rate_point_1);
+static DEVICE_ATTR_RO(ddr_data_rate_point_2);
+static DEVICE_ATTR_RO(ddr_data_rate_point_3);
+static DEVICE_ATTR_RW(rfi_disable);
+
+static ssize_t rfi_restriction_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ u16 id = 0x0008;
+ u32 input;
+ int ret;
+
+ ret = kstrtou32(buf, 10, &input);
+ if (ret)
+ return ret;
+
+ ret = processor_thermal_send_mbox_write_cmd(to_pci_dev(dev), id, input);
+ if (ret)
+ return ret;
+
+ return count;
+}
+
+static ssize_t rfi_restriction_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u16 id = 0x0007;
+ u64 resp;
+ int ret;
+
+ ret = processor_thermal_send_mbox_read_cmd(to_pci_dev(dev), id, &resp);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%llu\n", resp);
+}
+
+static ssize_t ddr_data_rate_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ u16 id = 0x0107;
+ u64 resp;
+ int ret;
+
+ ret = processor_thermal_send_mbox_read_cmd(to_pci_dev(dev), id, &resp);
+ if (ret)
+ return ret;
+
+ return sprintf(buf, "%llu\n", resp);
+}
+
+static DEVICE_ATTR_RW(rfi_restriction);
+static DEVICE_ATTR_RO(ddr_data_rate);
+
+static struct attribute *dvfs_attrs[] = {
+ &dev_attr_rfi_restriction_run_busy.attr,
+ &dev_attr_rfi_restriction_err_code.attr,
+ &dev_attr_rfi_restriction_data_rate.attr,
+ &dev_attr_rfi_restriction_data_rate_base.attr,
+ &dev_attr_ddr_data_rate_point_0.attr,
+ &dev_attr_ddr_data_rate_point_1.attr,
+ &dev_attr_ddr_data_rate_point_2.attr,
+ &dev_attr_ddr_data_rate_point_3.attr,
+ &dev_attr_rfi_disable.attr,
+ &dev_attr_ddr_data_rate.attr,
+ &dev_attr_rfi_restriction.attr,
+ NULL
+};
+
+static const struct attribute_group dvfs_attribute_group = {
+ .attrs = dvfs_attrs,
+ .name = "dvfs"
+};
+
+int proc_thermal_rfim_add(struct pci_dev *pdev, struct proc_thermal_device *proc_priv)
+{
+ int ret;
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_FIVR) {
+ ret = sysfs_create_group(&pdev->dev.kobj, &fivr_attribute_group);
+ if (ret)
+ return ret;
+ }
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR) {
+ ret = sysfs_create_group(&pdev->dev.kobj, &dlvr_attribute_group);
+ if (ret)
+ return ret;
+ }
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DVFS) {
+ ret = sysfs_create_group(&pdev->dev.kobj, &dvfs_attribute_group);
+ if (ret && proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_FIVR) {
+ sysfs_remove_group(&pdev->dev.kobj, &fivr_attribute_group);
+ return ret;
+ }
+ if (ret && proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR) {
+ sysfs_remove_group(&pdev->dev.kobj, &dlvr_attribute_group);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(proc_thermal_rfim_add);
+
+void proc_thermal_rfim_remove(struct pci_dev *pdev)
+{
+ struct proc_thermal_device *proc_priv = pci_get_drvdata(pdev);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_FIVR)
+ sysfs_remove_group(&pdev->dev.kobj, &fivr_attribute_group);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DLVR)
+ sysfs_remove_group(&pdev->dev.kobj, &dlvr_attribute_group);
+
+ if (proc_priv->mmio_feature_mask & PROC_THERMAL_FEATURE_DVFS)
+ sysfs_remove_group(&pdev->dev.kobj, &dvfs_attribute_group);
+}
+EXPORT_SYMBOL_GPL(proc_thermal_rfim_remove);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/intel_bxt_pmic_thermal.c b/drivers/thermal/intel/intel_bxt_pmic_thermal.c
new file mode 100644
index 0000000000..6312c6ba08
--- /dev/null
+++ b/drivers/thermal/intel/intel_bxt_pmic_thermal.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Intel Broxton PMIC thermal driver
+ *
+ * Copyright (C) 2016 Intel Corporation. All rights reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/thermal.h>
+#include <linux/platform_device.h>
+#include <linux/sched.h>
+#include <linux/mfd/intel_soc_pmic.h>
+
+#define BXTWC_THRM0IRQ 0x4E04
+#define BXTWC_THRM1IRQ 0x4E05
+#define BXTWC_THRM2IRQ 0x4E06
+#define BXTWC_MTHRM0IRQ 0x4E12
+#define BXTWC_MTHRM1IRQ 0x4E13
+#define BXTWC_MTHRM2IRQ 0x4E14
+#define BXTWC_STHRM0IRQ 0x4F19
+#define BXTWC_STHRM1IRQ 0x4F1A
+#define BXTWC_STHRM2IRQ 0x4F1B
+
+struct trip_config_map {
+ u16 irq_reg;
+ u16 irq_en;
+ u16 evt_stat;
+ u8 irq_mask;
+ u8 irq_en_mask;
+ u8 evt_mask;
+ u8 trip_num;
+};
+
+struct thermal_irq_map {
+ char handle[20];
+ int num_trips;
+ const struct trip_config_map *trip_config;
+};
+
+struct pmic_thermal_data {
+ const struct thermal_irq_map *maps;
+ int num_maps;
+};
+
+static const struct trip_config_map bxtwc_str0_trip_config[] = {
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x01,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x01,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x01,
+ .trip_num = 0
+ },
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x10,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x10,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x10,
+ .trip_num = 1
+ }
+};
+
+static const struct trip_config_map bxtwc_str1_trip_config[] = {
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x02,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x02,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x02,
+ .trip_num = 0
+ },
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x20,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x20,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x20,
+ .trip_num = 1
+ },
+};
+
+static const struct trip_config_map bxtwc_str2_trip_config[] = {
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x04,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x04,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x04,
+ .trip_num = 0
+ },
+ {
+ .irq_reg = BXTWC_THRM0IRQ,
+ .irq_mask = 0x40,
+ .irq_en = BXTWC_MTHRM0IRQ,
+ .irq_en_mask = 0x40,
+ .evt_stat = BXTWC_STHRM0IRQ,
+ .evt_mask = 0x40,
+ .trip_num = 1
+ },
+};
+
+static const struct trip_config_map bxtwc_str3_trip_config[] = {
+ {
+ .irq_reg = BXTWC_THRM2IRQ,
+ .irq_mask = 0x10,
+ .irq_en = BXTWC_MTHRM2IRQ,
+ .irq_en_mask = 0x10,
+ .evt_stat = BXTWC_STHRM2IRQ,
+ .evt_mask = 0x10,
+ .trip_num = 0
+ },
+};
+
+static const struct thermal_irq_map bxtwc_thermal_irq_map[] = {
+ {
+ .handle = "STR0",
+ .trip_config = bxtwc_str0_trip_config,
+ .num_trips = ARRAY_SIZE(bxtwc_str0_trip_config),
+ },
+ {
+ .handle = "STR1",
+ .trip_config = bxtwc_str1_trip_config,
+ .num_trips = ARRAY_SIZE(bxtwc_str1_trip_config),
+ },
+ {
+ .handle = "STR2",
+ .trip_config = bxtwc_str2_trip_config,
+ .num_trips = ARRAY_SIZE(bxtwc_str2_trip_config),
+ },
+ {
+ .handle = "STR3",
+ .trip_config = bxtwc_str3_trip_config,
+ .num_trips = ARRAY_SIZE(bxtwc_str3_trip_config),
+ },
+};
+
+static const struct pmic_thermal_data bxtwc_thermal_data = {
+ .maps = bxtwc_thermal_irq_map,
+ .num_maps = ARRAY_SIZE(bxtwc_thermal_irq_map),
+};
+
+static irqreturn_t pmic_thermal_irq_handler(int irq, void *data)
+{
+ struct platform_device *pdev = data;
+ struct thermal_zone_device *tzd;
+ struct pmic_thermal_data *td;
+ struct intel_soc_pmic *pmic;
+ struct regmap *regmap;
+ u8 reg_val, mask, irq_stat;
+ u16 reg, evt_stat_reg;
+ int i, j, ret;
+
+ pmic = dev_get_drvdata(pdev->dev.parent);
+ regmap = pmic->regmap;
+ td = (struct pmic_thermal_data *)
+ platform_get_device_id(pdev)->driver_data;
+
+ /* Resolve thermal irqs */
+ for (i = 0; i < td->num_maps; i++) {
+ for (j = 0; j < td->maps[i].num_trips; j++) {
+ reg = td->maps[i].trip_config[j].irq_reg;
+ mask = td->maps[i].trip_config[j].irq_mask;
+ /*
+ * Read the irq register to resolve whether the
+ * interrupt was triggered for this sensor
+ */
+ if (regmap_read(regmap, reg, &ret))
+ return IRQ_HANDLED;
+
+ reg_val = (u8)ret;
+ irq_stat = ((u8)ret & mask);
+
+ if (!irq_stat)
+ continue;
+
+ /*
+ * Read the status register to find out what
+ * event occurred i.e a high or a low
+ */
+ evt_stat_reg = td->maps[i].trip_config[j].evt_stat;
+ if (regmap_read(regmap, evt_stat_reg, &ret))
+ return IRQ_HANDLED;
+
+ tzd = thermal_zone_get_zone_by_name(td->maps[i].handle);
+ if (!IS_ERR(tzd))
+ thermal_zone_device_update(tzd,
+ THERMAL_EVENT_UNSPECIFIED);
+
+ /* Clear the appropriate irq */
+ regmap_write(regmap, reg, reg_val & mask);
+ }
+ }
+
+ return IRQ_HANDLED;
+}
+
+static int pmic_thermal_probe(struct platform_device *pdev)
+{
+ struct regmap_irq_chip_data *regmap_irq_chip;
+ struct pmic_thermal_data *thermal_data;
+ int ret, irq, virq, i, j, pmic_irq_count;
+ struct intel_soc_pmic *pmic;
+ struct regmap *regmap;
+ struct device *dev;
+ u16 reg;
+ u8 mask;
+
+ dev = &pdev->dev;
+ pmic = dev_get_drvdata(pdev->dev.parent);
+ if (!pmic) {
+ dev_err(dev, "Failed to get struct intel_soc_pmic pointer\n");
+ return -ENODEV;
+ }
+
+ thermal_data = (struct pmic_thermal_data *)
+ platform_get_device_id(pdev)->driver_data;
+ if (!thermal_data) {
+ dev_err(dev, "No thermal data initialized!!\n");
+ return -ENODEV;
+ }
+
+ regmap = pmic->regmap;
+ regmap_irq_chip = pmic->irq_chip_data;
+
+ pmic_irq_count = 0;
+ while ((irq = platform_get_irq(pdev, pmic_irq_count)) != -ENXIO) {
+ virq = regmap_irq_get_virq(regmap_irq_chip, irq);
+ if (virq < 0) {
+ dev_err(dev, "failed to get virq by irq %d\n", irq);
+ return virq;
+ }
+
+ ret = devm_request_threaded_irq(&pdev->dev, virq,
+ NULL, pmic_thermal_irq_handler,
+ IRQF_ONESHOT, "pmic_thermal", pdev);
+
+ if (ret) {
+ dev_err(dev, "request irq(%d) failed: %d\n", virq, ret);
+ return ret;
+ }
+ pmic_irq_count++;
+ }
+
+ /* Enable thermal interrupts */
+ for (i = 0; i < thermal_data->num_maps; i++) {
+ for (j = 0; j < thermal_data->maps[i].num_trips; j++) {
+ reg = thermal_data->maps[i].trip_config[j].irq_en;
+ mask = thermal_data->maps[i].trip_config[j].irq_en_mask;
+ ret = regmap_update_bits(regmap, reg, mask, 0x00);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static const struct platform_device_id pmic_thermal_id_table[] = {
+ {
+ .name = "bxt_wcove_thermal",
+ .driver_data = (kernel_ulong_t)&bxtwc_thermal_data,
+ },
+ {},
+};
+
+static struct platform_driver pmic_thermal_driver = {
+ .probe = pmic_thermal_probe,
+ .driver = {
+ .name = "pmic_thermal",
+ },
+ .id_table = pmic_thermal_id_table,
+};
+
+MODULE_DEVICE_TABLE(platform, pmic_thermal_id_table);
+module_platform_driver(pmic_thermal_driver);
+
+MODULE_AUTHOR("Yegnesh S Iyer <yegnesh.s.iyer@intel.com>");
+MODULE_DESCRIPTION("Intel Broxton PMIC Thermal Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/intel_hfi.c b/drivers/thermal/intel/intel_hfi.c
new file mode 100644
index 0000000000..1c5a429b2e
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.c
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hardware Feedback Interface Driver
+ *
+ * Copyright (c) 2021, Intel Corporation.
+ *
+ * Authors: Aubrey Li <aubrey.li@linux.intel.com>
+ * Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+ *
+ *
+ * The Hardware Feedback Interface provides a performance and energy efficiency
+ * capability information for each CPU in the system. Depending on the processor
+ * model, hardware may periodically update these capabilities as a result of
+ * changes in the operating conditions (e.g., power limits or thermal
+ * constraints). On other processor models, there is a single HFI update
+ * at boot.
+ *
+ * This file provides functionality to process HFI updates and relay these
+ * updates to userspace.
+ */
+
+#define pr_fmt(fmt) "intel-hfi: " fmt
+
+#include <linux/bitops.h>
+#include <linux/cpufeature.h>
+#include <linux/cpumask.h>
+#include <linux/delay.h>
+#include <linux/gfp.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/math.h>
+#include <linux/mutex.h>
+#include <linux/percpu-defs.h>
+#include <linux/printk.h>
+#include <linux/processor.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/suspend.h>
+#include <linux/string.h>
+#include <linux/syscore_ops.h>
+#include <linux/topology.h>
+#include <linux/workqueue.h>
+
+#include <asm/msr.h>
+
+#include "intel_hfi.h"
+#include "thermal_interrupt.h"
+
+#include "../thermal_netlink.h"
+
+/* Hardware Feedback Interface MSR configuration bits */
+#define HW_FEEDBACK_PTR_VALID_BIT BIT(0)
+#define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0)
+
+/* CPUID detection and enumeration definitions for HFI */
+
+#define CPUID_HFI_LEAF 6
+
+union hfi_capabilities {
+ struct {
+ u8 performance:1;
+ u8 energy_efficiency:1;
+ u8 __reserved:6;
+ } split;
+ u8 bits;
+};
+
+union cpuid6_edx {
+ struct {
+ union hfi_capabilities capabilities;
+ u32 table_pages:4;
+ u32 __reserved:4;
+ s32 index:16;
+ } split;
+ u32 full;
+};
+
+/**
+ * struct hfi_cpu_data - HFI capabilities per CPU
+ * @perf_cap: Performance capability
+ * @ee_cap: Energy efficiency capability
+ *
+ * Capabilities of a logical processor in the HFI table. These capabilities are
+ * unitless.
+ */
+struct hfi_cpu_data {
+ u8 perf_cap;
+ u8 ee_cap;
+} __packed;
+
+/**
+ * struct hfi_hdr - Header of the HFI table
+ * @perf_updated: Hardware updated performance capabilities
+ * @ee_updated: Hardware updated energy efficiency capabilities
+ *
+ * Properties of the data in an HFI table.
+ */
+struct hfi_hdr {
+ u8 perf_updated;
+ u8 ee_updated;
+} __packed;
+
+/**
+ * struct hfi_instance - Representation of an HFI instance (i.e., a table)
+ * @local_table: Base of the local copy of the HFI table
+ * @timestamp: Timestamp of the last update of the local table.
+ * Located at the base of the local table.
+ * @hdr: Base address of the header of the local table
+ * @data: Base address of the data of the local table
+ * @cpus: CPUs represented in this HFI table instance
+ * @hw_table: Pointer to the HFI table of this instance
+ * @update_work: Delayed work to process HFI updates
+ * @table_lock: Lock to protect acceses to the table of this instance
+ * @event_lock: Lock to process HFI interrupts
+ *
+ * A set of parameters to parse and navigate a specific HFI table.
+ */
+struct hfi_instance {
+ union {
+ void *local_table;
+ u64 *timestamp;
+ };
+ void *hdr;
+ void *data;
+ cpumask_var_t cpus;
+ void *hw_table;
+ struct delayed_work update_work;
+ raw_spinlock_t table_lock;
+ raw_spinlock_t event_lock;
+};
+
+/**
+ * struct hfi_features - Supported HFI features
+ * @nr_table_pages: Size of the HFI table in 4KB pages
+ * @cpu_stride: Stride size to locate the capability data of a logical
+ * processor within the table (i.e., row stride)
+ * @hdr_size: Size of the table header
+ *
+ * Parameters and supported features that are common to all HFI instances
+ */
+struct hfi_features {
+ size_t nr_table_pages;
+ unsigned int cpu_stride;
+ unsigned int hdr_size;
+};
+
+/**
+ * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
+ * @index: Row of this CPU in its HFI table
+ * @hfi_instance: Attributes of the HFI table to which this CPU belongs
+ *
+ * Parameters to link a logical processor to an HFI table and a row within it.
+ */
+struct hfi_cpu_info {
+ s16 index;
+ struct hfi_instance *hfi_instance;
+};
+
+static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
+
+static int max_hfi_instances;
+static struct hfi_instance *hfi_instances;
+
+static struct hfi_features hfi_features;
+static DEFINE_MUTEX(hfi_instance_lock);
+
+static struct workqueue_struct *hfi_updates_wq;
+#define HFI_UPDATE_INTERVAL HZ
+#define HFI_MAX_THERM_NOTIFY_COUNT 16
+
+static void get_hfi_caps(struct hfi_instance *hfi_instance,
+ struct thermal_genl_cpu_caps *cpu_caps)
+{
+ int cpu, i = 0;
+
+ raw_spin_lock_irq(&hfi_instance->table_lock);
+ for_each_cpu(cpu, hfi_instance->cpus) {
+ struct hfi_cpu_data *caps;
+ s16 index;
+
+ index = per_cpu(hfi_cpu_info, cpu).index;
+ caps = hfi_instance->data + index * hfi_features.cpu_stride;
+ cpu_caps[i].cpu = cpu;
+
+ /*
+ * Scale performance and energy efficiency to
+ * the [0, 1023] interval that thermal netlink uses.
+ */
+ cpu_caps[i].performance = caps->perf_cap << 2;
+ cpu_caps[i].efficiency = caps->ee_cap << 2;
+
+ ++i;
+ }
+ raw_spin_unlock_irq(&hfi_instance->table_lock);
+}
+
+/*
+ * Call update_capabilities() when there are changes in the HFI table.
+ */
+static void update_capabilities(struct hfi_instance *hfi_instance)
+{
+ struct thermal_genl_cpu_caps *cpu_caps;
+ int i = 0, cpu_count;
+
+ /* CPUs may come online/offline while processing an HFI update. */
+ mutex_lock(&hfi_instance_lock);
+
+ cpu_count = cpumask_weight(hfi_instance->cpus);
+
+ /* No CPUs to report in this hfi_instance. */
+ if (!cpu_count)
+ goto out;
+
+ cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
+ if (!cpu_caps)
+ goto out;
+
+ get_hfi_caps(hfi_instance, cpu_caps);
+
+ if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
+ goto last_cmd;
+
+ /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
+ for (i = 0;
+ (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
+ i += HFI_MAX_THERM_NOTIFY_COUNT)
+ thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
+ &cpu_caps[i]);
+
+ cpu_count = cpu_count - i;
+
+last_cmd:
+ /* Process the remaining capabilities if any. */
+ if (cpu_count)
+ thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
+
+ kfree(cpu_caps);
+out:
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static void hfi_update_work_fn(struct work_struct *work)
+{
+ struct hfi_instance *hfi_instance;
+
+ hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
+ update_work);
+
+ update_capabilities(hfi_instance);
+}
+
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
+{
+ struct hfi_instance *hfi_instance;
+ int cpu = smp_processor_id();
+ struct hfi_cpu_info *info;
+ u64 new_timestamp, msr, hfi;
+
+ if (!pkg_therm_status_msr_val)
+ return;
+
+ info = &per_cpu(hfi_cpu_info, cpu);
+ if (!info)
+ return;
+
+ /*
+ * A CPU is linked to its HFI instance before the thermal vector in the
+ * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
+ * when receiving an HFI event.
+ */
+ hfi_instance = info->hfi_instance;
+ if (unlikely(!hfi_instance)) {
+ pr_debug("Received event on CPU %d but instance was null", cpu);
+ return;
+ }
+
+ /*
+ * On most systems, all CPUs in the package receive a package-level
+ * thermal interrupt when there is an HFI update. It is sufficient to
+ * let a single CPU to acknowledge the update and queue work to
+ * process it. The remaining CPUs can resume their work.
+ */
+ if (!raw_spin_trylock(&hfi_instance->event_lock))
+ return;
+
+ rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
+ hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
+ if (!hfi) {
+ raw_spin_unlock(&hfi_instance->event_lock);
+ return;
+ }
+
+ /*
+ * Ack duplicate update. Since there is an active HFI
+ * status from HW, it must be a new event, not a case
+ * where a lagging CPU entered the locked region.
+ */
+ new_timestamp = *(u64 *)hfi_instance->hw_table;
+ if (*hfi_instance->timestamp == new_timestamp) {
+ thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
+ raw_spin_unlock(&hfi_instance->event_lock);
+ return;
+ }
+
+ raw_spin_lock(&hfi_instance->table_lock);
+
+ /*
+ * Copy the updated table into our local copy. This includes the new
+ * timestamp.
+ */
+ memcpy(hfi_instance->local_table, hfi_instance->hw_table,
+ hfi_features.nr_table_pages << PAGE_SHIFT);
+
+ /*
+ * Let hardware know that we are done reading the HFI table and it is
+ * free to update it again.
+ */
+ thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
+
+ raw_spin_unlock(&hfi_instance->table_lock);
+ raw_spin_unlock(&hfi_instance->event_lock);
+
+ queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
+ HFI_UPDATE_INTERVAL);
+}
+
+static void init_hfi_cpu_index(struct hfi_cpu_info *info)
+{
+ union cpuid6_edx edx;
+
+ /* Do not re-read @cpu's index if it has already been initialized. */
+ if (info->index > -1)
+ return;
+
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+ info->index = edx.split.index;
+}
+
+/*
+ * The format of the HFI table depends on the number of capabilities that the
+ * hardware supports. Keep a data structure to navigate the table.
+ */
+static void init_hfi_instance(struct hfi_instance *hfi_instance)
+{
+ /* The HFI header is below the time-stamp. */
+ hfi_instance->hdr = hfi_instance->local_table +
+ sizeof(*hfi_instance->timestamp);
+
+ /* The HFI data starts below the header. */
+ hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
+}
+
+/* Caller must hold hfi_instance_lock. */
+static void hfi_enable(void)
+{
+ u64 msr_val;
+
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+}
+
+static void hfi_set_hw_table(struct hfi_instance *hfi_instance)
+{
+ phys_addr_t hw_table_pa;
+ u64 msr_val;
+
+ hw_table_pa = virt_to_phys(hfi_instance->hw_table);
+ msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
+}
+
+/* Caller must hold hfi_instance_lock. */
+static void hfi_disable(void)
+{
+ u64 msr_val;
+ int i;
+
+ rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+ msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
+ wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
+
+ /*
+ * Wait for hardware to acknowledge the disabling of HFI. Some
+ * processors may not do it. Wait for ~2ms. This is a reasonable
+ * time for hardware to complete any pending actions on the HFI
+ * memory.
+ */
+ for (i = 0; i < 2000; i++) {
+ rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED)
+ break;
+
+ udelay(1);
+ cpu_relax();
+ }
+}
+
+/**
+ * intel_hfi_online() - Enable HFI on @cpu
+ * @cpu: CPU in which the HFI will be enabled
+ *
+ * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
+ * level. The first CPU in the die/package to come online does the full HFI
+ * initialization. Subsequent CPUs will just link themselves to the HFI
+ * instance of their die/package.
+ *
+ * This function is called before enabling the thermal vector in the local APIC
+ * in order to ensure that @cpu has an associated HFI instance when it receives
+ * an HFI event.
+ */
+void intel_hfi_online(unsigned int cpu)
+{
+ struct hfi_instance *hfi_instance;
+ struct hfi_cpu_info *info;
+ u16 die_id;
+
+ /* Nothing to do if hfi_instances are missing. */
+ if (!hfi_instances)
+ return;
+
+ /*
+ * Link @cpu to the HFI instance of its package/die. It does not
+ * matter whether the instance has been initialized.
+ */
+ info = &per_cpu(hfi_cpu_info, cpu);
+ die_id = topology_logical_die_id(cpu);
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance) {
+ if (die_id >= max_hfi_instances)
+ return;
+
+ hfi_instance = &hfi_instances[die_id];
+ info->hfi_instance = hfi_instance;
+ }
+
+ init_hfi_cpu_index(info);
+
+ /*
+ * Now check if the HFI instance of the package/die of @cpu has been
+ * initialized (by checking its header). In such case, all we have to
+ * do is to add @cpu to this instance's cpumask.
+ */
+ mutex_lock(&hfi_instance_lock);
+ if (hfi_instance->hdr) {
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+ goto unlock;
+ }
+
+ /*
+ * Hardware is programmed with the physical address of the first page
+ * frame of the table. Hence, the allocated memory must be page-aligned.
+ *
+ * Some processors do not forget the initial address of the HFI table
+ * even after having been reprogrammed. Keep using the same pages. Do
+ * not free them.
+ */
+ hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hfi_instance->hw_table)
+ goto unlock;
+
+ /*
+ * Allocate memory to keep a local copy of the table that
+ * hardware generates.
+ */
+ hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
+ GFP_KERNEL);
+ if (!hfi_instance->local_table)
+ goto free_hw_table;
+
+ init_hfi_instance(hfi_instance);
+
+ INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
+ raw_spin_lock_init(&hfi_instance->table_lock);
+ raw_spin_lock_init(&hfi_instance->event_lock);
+
+ cpumask_set_cpu(cpu, hfi_instance->cpus);
+
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+
+unlock:
+ mutex_unlock(&hfi_instance_lock);
+ return;
+
+free_hw_table:
+ free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
+ goto unlock;
+}
+
+/**
+ * intel_hfi_offline() - Disable HFI on @cpu
+ * @cpu: CPU in which the HFI will be disabled
+ *
+ * Remove @cpu from those covered by its HFI instance.
+ *
+ * On some processors, hardware remembers previous programming settings even
+ * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
+ * die/package of @cpu are offline. See note in intel_hfi_online().
+ */
+void intel_hfi_offline(unsigned int cpu)
+{
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
+ struct hfi_instance *hfi_instance;
+
+ /*
+ * Check if @cpu as an associated, initialized (i.e., with a non-NULL
+ * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
+ * is present.
+ */
+ hfi_instance = info->hfi_instance;
+ if (!hfi_instance)
+ return;
+
+ if (!hfi_instance->hdr)
+ return;
+
+ mutex_lock(&hfi_instance_lock);
+ cpumask_clear_cpu(cpu, hfi_instance->cpus);
+
+ if (!cpumask_weight(hfi_instance->cpus))
+ hfi_disable();
+
+ mutex_unlock(&hfi_instance_lock);
+}
+
+static __init int hfi_parse_features(void)
+{
+ unsigned int nr_capabilities;
+ union cpuid6_edx edx;
+
+ if (!boot_cpu_has(X86_FEATURE_HFI))
+ return -ENODEV;
+
+ /*
+ * If we are here we know that CPUID_HFI_LEAF exists. Parse the
+ * supported capabilities and the size of the HFI table.
+ */
+ edx.full = cpuid_edx(CPUID_HFI_LEAF);
+
+ if (!edx.split.capabilities.split.performance) {
+ pr_debug("Performance reporting not supported! Not using HFI\n");
+ return -ENODEV;
+ }
+
+ /*
+ * The number of supported capabilities determines the number of
+ * columns in the HFI table. Exclude the reserved bits.
+ */
+ edx.split.capabilities.split.__reserved = 0;
+ nr_capabilities = hweight8(edx.split.capabilities.bits);
+
+ /* The number of 4KB pages required by the table */
+ hfi_features.nr_table_pages = edx.split.table_pages + 1;
+
+ /*
+ * The header contains change indications for each supported feature.
+ * The size of the table header is rounded up to be a multiple of 8
+ * bytes.
+ */
+ hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ /*
+ * Data of each logical processor is also rounded up to be a multiple
+ * of 8 bytes.
+ */
+ hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
+
+ return 0;
+}
+
+static void hfi_do_enable(void)
+{
+ /* This code runs only on the boot CPU. */
+ struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0);
+ struct hfi_instance *hfi_instance = info->hfi_instance;
+
+ /* No locking needed. There is no concurrency with CPU online. */
+ hfi_set_hw_table(hfi_instance);
+ hfi_enable();
+}
+
+static int hfi_do_disable(void)
+{
+ /* No locking needed. There is no concurrency with CPU offline. */
+ hfi_disable();
+
+ return 0;
+}
+
+static struct syscore_ops hfi_pm_ops = {
+ .resume = hfi_do_enable,
+ .suspend = hfi_do_disable,
+};
+
+void __init intel_hfi_init(void)
+{
+ struct hfi_instance *hfi_instance;
+ int i, j;
+
+ if (hfi_parse_features())
+ return;
+
+ /* There is one HFI instance per die/package. */
+ max_hfi_instances = topology_max_packages() *
+ topology_max_die_per_package();
+
+ /*
+ * This allocation may fail. CPU hotplug callbacks must check
+ * for a null pointer.
+ */
+ hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
+ GFP_KERNEL);
+ if (!hfi_instances)
+ return;
+
+ for (i = 0; i < max_hfi_instances; i++) {
+ hfi_instance = &hfi_instances[i];
+ if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
+ goto err_nomem;
+ }
+
+ hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
+ if (!hfi_updates_wq)
+ goto err_nomem;
+
+ register_syscore_ops(&hfi_pm_ops);
+
+ return;
+
+err_nomem:
+ for (j = 0; j < i; ++j) {
+ hfi_instance = &hfi_instances[j];
+ free_cpumask_var(hfi_instance->cpus);
+ }
+
+ kfree(hfi_instances);
+ hfi_instances = NULL;
+}
diff --git a/drivers/thermal/intel/intel_hfi.h b/drivers/thermal/intel/intel_hfi.h
new file mode 100644
index 0000000000..325aa78b74
--- /dev/null
+++ b/drivers/thermal/intel/intel_hfi.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_HFI_H
+#define _INTEL_HFI_H
+
+#if defined(CONFIG_INTEL_HFI_THERMAL)
+void __init intel_hfi_init(void);
+void intel_hfi_online(unsigned int cpu);
+void intel_hfi_offline(unsigned int cpu);
+void intel_hfi_process_event(__u64 pkg_therm_status_msr_val);
+#else
+static inline void intel_hfi_init(void) { }
+static inline void intel_hfi_online(unsigned int cpu) { }
+static inline void intel_hfi_offline(unsigned int cpu) { }
+static inline void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) { }
+#endif /* CONFIG_INTEL_HFI_THERMAL */
+
+#endif /* _INTEL_HFI_H */
diff --git a/drivers/thermal/intel/intel_pch_thermal.c b/drivers/thermal/intel/intel_pch_thermal.c
new file mode 100644
index 0000000000..b3905e34c5
--- /dev/null
+++ b/drivers/thermal/intel/intel_pch_thermal.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* intel_pch_thermal.c - Intel PCH Thermal driver
+ *
+ * Copyright (c) 2015, Intel Corporation.
+ *
+ * Authors:
+ * Tushar Dave <tushar.n.dave@intel.com>
+ */
+
+#include <linux/acpi.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+#include <linux/units.h>
+
+/* Intel PCH thermal Device IDs */
+#define PCH_THERMAL_DID_HSW_1 0x9C24 /* Haswell PCH */
+#define PCH_THERMAL_DID_HSW_2 0x8C24 /* Haswell PCH */
+#define PCH_THERMAL_DID_WPT 0x9CA4 /* Wildcat Point */
+#define PCH_THERMAL_DID_SKL 0x9D31 /* Skylake PCH */
+#define PCH_THERMAL_DID_SKL_H 0xA131 /* Skylake PCH 100 series */
+#define PCH_THERMAL_DID_CNL 0x9Df9 /* CNL PCH */
+#define PCH_THERMAL_DID_CNL_H 0xA379 /* CNL-H PCH */
+#define PCH_THERMAL_DID_CNL_LP 0x02F9 /* CNL-LP PCH */
+#define PCH_THERMAL_DID_CML_H 0X06F9 /* CML-H PCH */
+#define PCH_THERMAL_DID_LWB 0xA1B1 /* Lewisburg PCH */
+#define PCH_THERMAL_DID_WBG 0x8D24 /* Wellsburg PCH */
+
+/* Wildcat Point-LP PCH Thermal registers */
+#define WPT_TEMP 0x0000 /* Temperature */
+#define WPT_TSC 0x04 /* Thermal Sensor Control */
+#define WPT_TSS 0x06 /* Thermal Sensor Status */
+#define WPT_TSEL 0x08 /* Thermal Sensor Enable and Lock */
+#define WPT_TSREL 0x0A /* Thermal Sensor Report Enable and Lock */
+#define WPT_TSMIC 0x0C /* Thermal Sensor SMI Control */
+#define WPT_CTT 0x0010 /* Catastrophic Trip Point */
+#define WPT_TSPM 0x001C /* Thermal Sensor Power Management */
+#define WPT_TAHV 0x0014 /* Thermal Alert High Value */
+#define WPT_TALV 0x0018 /* Thermal Alert Low Value */
+#define WPT_TL 0x00000040 /* Throttle Value */
+#define WPT_PHL 0x0060 /* PCH Hot Level */
+#define WPT_PHLC 0x62 /* PHL Control */
+#define WPT_TAS 0x80 /* Thermal Alert Status */
+#define WPT_TSPIEN 0x82 /* PCI Interrupt Event Enables */
+#define WPT_TSGPEN 0x84 /* General Purpose Event Enables */
+
+/* Wildcat Point-LP PCH Thermal Register bit definitions */
+#define WPT_TEMP_TSR 0x01ff /* Temp TS Reading */
+#define WPT_TSC_CPDE 0x01 /* Catastrophic Power-Down Enable */
+#define WPT_TSS_TSDSS 0x10 /* Thermal Sensor Dynamic Shutdown Status */
+#define WPT_TSS_GPES 0x08 /* GPE status */
+#define WPT_TSEL_ETS 0x01 /* Enable TS */
+#define WPT_TSEL_PLDB 0x80 /* TSEL Policy Lock-Down Bit */
+#define WPT_TL_TOL 0x000001FF /* T0 Level */
+#define WPT_TL_T1L 0x1ff00000 /* T1 Level */
+#define WPT_TL_TTEN 0x20000000 /* TT Enable */
+
+/* Resolution of 1/2 degree C and an offset of -50C */
+#define PCH_TEMP_OFFSET (-50)
+#define GET_WPT_TEMP(x) ((x) * MILLIDEGREE_PER_DEGREE / 2 + WPT_TEMP_OFFSET)
+#define WPT_TEMP_OFFSET (PCH_TEMP_OFFSET * MILLIDEGREE_PER_DEGREE)
+#define GET_PCH_TEMP(x) (((x) / 2) + PCH_TEMP_OFFSET)
+
+#define PCH_MAX_TRIPS 3 /* critical, hot, passive */
+
+/* Amount of time for each cooling delay, 100ms by default for now */
+static unsigned int delay_timeout = 100;
+module_param(delay_timeout, int, 0644);
+MODULE_PARM_DESC(delay_timeout, "amount of time delay for each iteration.");
+
+/* Number of iterations for cooling delay, 600 counts by default for now */
+static unsigned int delay_cnt = 600;
+module_param(delay_cnt, int, 0644);
+MODULE_PARM_DESC(delay_cnt, "total number of iterations for time delay.");
+
+static char driver_name[] = "Intel PCH thermal driver";
+
+struct pch_thermal_device {
+ void __iomem *hw_base;
+ struct pci_dev *pdev;
+ struct thermal_zone_device *tzd;
+ struct thermal_trip trips[PCH_MAX_TRIPS];
+ bool bios_enabled;
+};
+
+#ifdef CONFIG_ACPI
+/*
+ * On some platforms, there is a companion ACPI device, which adds
+ * passive trip temperature using _PSV method. There is no specific
+ * passive temperature setting in MMIO interface of this PCI device.
+ */
+static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip)
+{
+ struct acpi_device *adev;
+ int temp;
+
+ adev = ACPI_COMPANION(&ptd->pdev->dev);
+ if (!adev)
+ return 0;
+
+ if (thermal_acpi_passive_trip_temp(adev, &temp) || temp <= 0)
+ return 0;
+
+ ptd->trips[trip].type = THERMAL_TRIP_PASSIVE;
+ ptd->trips[trip].temperature = temp;
+ return 1;
+}
+#else
+static int pch_wpt_add_acpi_psv_trip(struct pch_thermal_device *ptd, int trip)
+{
+ return 0;
+}
+#endif
+
+static int pch_thermal_get_temp(struct thermal_zone_device *tzd, int *temp)
+{
+ struct pch_thermal_device *ptd = thermal_zone_device_priv(tzd);
+
+ *temp = GET_WPT_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+ return 0;
+}
+
+static void pch_critical(struct thermal_zone_device *tzd)
+{
+ dev_dbg(thermal_zone_device(tzd), "%s: critical temperature reached\n",
+ thermal_zone_device_type(tzd));
+}
+
+static struct thermal_zone_device_ops tzd_ops = {
+ .get_temp = pch_thermal_get_temp,
+ .critical = pch_critical,
+};
+
+enum pch_board_ids {
+ PCH_BOARD_HSW = 0,
+ PCH_BOARD_WPT,
+ PCH_BOARD_SKL,
+ PCH_BOARD_CNL,
+ PCH_BOARD_CML,
+ PCH_BOARD_LWB,
+ PCH_BOARD_WBG,
+};
+
+static const char *board_names[] = {
+ [PCH_BOARD_HSW] = "pch_haswell",
+ [PCH_BOARD_WPT] = "pch_wildcat_point",
+ [PCH_BOARD_SKL] = "pch_skylake",
+ [PCH_BOARD_CNL] = "pch_cannonlake",
+ [PCH_BOARD_CML] = "pch_cometlake",
+ [PCH_BOARD_LWB] = "pch_lewisburg",
+ [PCH_BOARD_WBG] = "pch_wellsburg",
+};
+
+static int intel_pch_thermal_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ enum pch_board_ids board_id = id->driver_data;
+ struct pch_thermal_device *ptd;
+ int nr_trips = 0;
+ u16 trip_temp;
+ u8 tsel;
+ int err;
+
+ ptd = devm_kzalloc(&pdev->dev, sizeof(*ptd), GFP_KERNEL);
+ if (!ptd)
+ return -ENOMEM;
+
+ pci_set_drvdata(pdev, ptd);
+ ptd->pdev = pdev;
+
+ err = pci_enable_device(pdev);
+ if (err) {
+ dev_err(&pdev->dev, "failed to enable pci device\n");
+ return err;
+ }
+
+ err = pci_request_regions(pdev, driver_name);
+ if (err) {
+ dev_err(&pdev->dev, "failed to request pci region\n");
+ goto error_disable;
+ }
+
+ ptd->hw_base = pci_ioremap_bar(pdev, 0);
+ if (!ptd->hw_base) {
+ err = -ENOMEM;
+ dev_err(&pdev->dev, "failed to map mem base\n");
+ goto error_release;
+ }
+
+ /* Check if BIOS has already enabled thermal sensor */
+ if (WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL)) {
+ ptd->bios_enabled = true;
+ goto read_trips;
+ }
+
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+ /*
+ * When TSEL's Policy Lock-Down bit is 1, TSEL become RO.
+ * If so, thermal sensor cannot enable. Bail out.
+ */
+ if (tsel & WPT_TSEL_PLDB) {
+ dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
+ err = -ENODEV;
+ goto error_cleanup;
+ }
+
+ writeb(tsel|WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+ if (!(WPT_TSEL_ETS & readb(ptd->hw_base + WPT_TSEL))) {
+ dev_err(&ptd->pdev->dev, "Sensor can't be enabled\n");
+ err = -ENODEV;
+ goto error_cleanup;
+ }
+
+read_trips:
+ trip_temp = readw(ptd->hw_base + WPT_CTT);
+ trip_temp &= 0x1FF;
+ if (trip_temp) {
+ ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp);
+ ptd->trips[nr_trips++].type = THERMAL_TRIP_CRITICAL;
+ }
+
+ trip_temp = readw(ptd->hw_base + WPT_PHL);
+ trip_temp &= 0x1FF;
+ if (trip_temp) {
+ ptd->trips[nr_trips].temperature = GET_WPT_TEMP(trip_temp);
+ ptd->trips[nr_trips++].type = THERMAL_TRIP_HOT;
+ }
+
+ nr_trips += pch_wpt_add_acpi_psv_trip(ptd, nr_trips);
+
+ ptd->tzd = thermal_zone_device_register_with_trips(board_names[board_id],
+ ptd->trips, nr_trips,
+ 0, ptd, &tzd_ops,
+ NULL, 0, 0);
+ if (IS_ERR(ptd->tzd)) {
+ dev_err(&pdev->dev, "Failed to register thermal zone %s\n",
+ board_names[board_id]);
+ err = PTR_ERR(ptd->tzd);
+ goto error_cleanup;
+ }
+ err = thermal_zone_device_enable(ptd->tzd);
+ if (err)
+ goto err_unregister;
+
+ return 0;
+
+err_unregister:
+ thermal_zone_device_unregister(ptd->tzd);
+error_cleanup:
+ iounmap(ptd->hw_base);
+error_release:
+ pci_release_regions(pdev);
+error_disable:
+ pci_disable_device(pdev);
+ dev_err(&pdev->dev, "pci device failed to probe\n");
+ return err;
+}
+
+static void intel_pch_thermal_remove(struct pci_dev *pdev)
+{
+ struct pch_thermal_device *ptd = pci_get_drvdata(pdev);
+
+ thermal_zone_device_unregister(ptd->tzd);
+ iounmap(ptd->hw_base);
+ pci_set_drvdata(pdev, NULL);
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+}
+
+static int intel_pch_thermal_suspend_noirq(struct device *device)
+{
+ struct pch_thermal_device *ptd = dev_get_drvdata(device);
+ u16 pch_thr_temp, pch_cur_temp;
+ int pch_delay_cnt = 0;
+ u8 tsel;
+
+ /* Shutdown the thermal sensor if it is not enabled by BIOS */
+ if (!ptd->bios_enabled) {
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+ writeb(tsel & 0xFE, ptd->hw_base + WPT_TSEL);
+ return 0;
+ }
+
+ /* Do not check temperature if it is not s2idle */
+ if (pm_suspend_via_firmware())
+ return 0;
+
+ /* Get the PCH temperature threshold value */
+ pch_thr_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TSPM));
+
+ /* Get the PCH current temperature value */
+ pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+
+ /*
+ * If current PCH temperature is higher than configured PCH threshold
+ * value, run some delay loop with sleep to let the current temperature
+ * go down below the threshold value which helps to allow system enter
+ * lower power S0ix suspend state. Even after delay loop if PCH current
+ * temperature stays above threshold, notify the warning message
+ * which helps to indentify the reason why S0ix entry was rejected.
+ */
+ while (pch_delay_cnt < delay_cnt) {
+ if (pch_cur_temp < pch_thr_temp)
+ break;
+
+ if (pm_wakeup_pending()) {
+ dev_warn(&ptd->pdev->dev, "Wakeup event detected, abort cooling\n");
+ return 0;
+ }
+
+ pch_delay_cnt++;
+ dev_dbg(&ptd->pdev->dev,
+ "CPU-PCH current temp [%dC] higher than the threshold temp [%dC], sleep %d times for %d ms duration\n",
+ pch_cur_temp, pch_thr_temp, pch_delay_cnt, delay_timeout);
+ msleep(delay_timeout);
+ /* Read the PCH current temperature for next cycle. */
+ pch_cur_temp = GET_PCH_TEMP(WPT_TEMP_TSR & readw(ptd->hw_base + WPT_TEMP));
+ }
+
+ if (pch_cur_temp >= pch_thr_temp)
+ dev_warn(&ptd->pdev->dev,
+ "CPU-PCH is hot [%dC] after %d ms delay. S0ix might fail\n",
+ pch_cur_temp, pch_delay_cnt * delay_timeout);
+ else {
+ if (pch_delay_cnt)
+ dev_info(&ptd->pdev->dev,
+ "CPU-PCH is cool [%dC] after %d ms delay\n",
+ pch_cur_temp, pch_delay_cnt * delay_timeout);
+ else
+ dev_info(&ptd->pdev->dev,
+ "CPU-PCH is cool [%dC]\n",
+ pch_cur_temp);
+ }
+
+ return 0;
+}
+
+static int intel_pch_thermal_resume(struct device *device)
+{
+ struct pch_thermal_device *ptd = dev_get_drvdata(device);
+ u8 tsel;
+
+ if (ptd->bios_enabled)
+ return 0;
+
+ tsel = readb(ptd->hw_base + WPT_TSEL);
+
+ writeb(tsel | WPT_TSEL_ETS, ptd->hw_base + WPT_TSEL);
+
+ return 0;
+}
+
+static const struct pci_device_id intel_pch_thermal_id[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_1),
+ .driver_data = PCH_BOARD_HSW, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_HSW_2),
+ .driver_data = PCH_BOARD_HSW, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WPT),
+ .driver_data = PCH_BOARD_WPT, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL),
+ .driver_data = PCH_BOARD_SKL, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_SKL_H),
+ .driver_data = PCH_BOARD_SKL, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL),
+ .driver_data = PCH_BOARD_CNL, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_H),
+ .driver_data = PCH_BOARD_CNL, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CNL_LP),
+ .driver_data = PCH_BOARD_CNL, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_CML_H),
+ .driver_data = PCH_BOARD_CML, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_LWB),
+ .driver_data = PCH_BOARD_LWB, },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCH_THERMAL_DID_WBG),
+ .driver_data = PCH_BOARD_WBG, },
+ { 0, },
+};
+MODULE_DEVICE_TABLE(pci, intel_pch_thermal_id);
+
+static const struct dev_pm_ops intel_pch_pm_ops = {
+ .suspend_noirq = intel_pch_thermal_suspend_noirq,
+ .resume = intel_pch_thermal_resume,
+};
+
+static struct pci_driver intel_pch_thermal_driver = {
+ .name = "intel_pch_thermal",
+ .id_table = intel_pch_thermal_id,
+ .probe = intel_pch_thermal_probe,
+ .remove = intel_pch_thermal_remove,
+ .driver.pm = &intel_pch_pm_ops,
+};
+
+module_pci_driver(intel_pch_thermal_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Intel PCH Thermal driver");
diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c
new file mode 100644
index 0000000000..5ac5cb60ba
--- /dev/null
+++ b/drivers/thermal/intel/intel_powerclamp.c
@@ -0,0 +1,849 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_powerclamp.c - package c-state idle injection
+ *
+ * Copyright (c) 2012-2023, Intel Corporation.
+ *
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ * TODO:
+ * 1. better handle wakeup from external interrupts, currently a fixed
+ * compensation is added to clamping duration when excessive amount
+ * of wakeups are observed during idle time. the reason is that in
+ * case of external interrupts without need for ack, clamping down
+ * cpu in non-irq context does not reduce irq. for majority of the
+ * cases, clamping down cpu does help reduce irq as well, we should
+ * be able to differentiate the two cases and give a quantitative
+ * solution for the irqs that we can control. perhaps based on
+ * get_cpu_iowait_time_us()
+ *
+ * 2. synchronization with other hw blocks
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/cpu.h>
+#include <linux/thermal.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/idle_inject.h>
+
+#include <asm/msr.h>
+#include <asm/mwait.h>
+#include <asm/cpu_device_id.h>
+
+#define MAX_TARGET_RATIO (100U)
+/* For each undisturbed clamping period (no extra wake ups during idle time),
+ * we increment the confidence counter for the given target ratio.
+ * CONFIDENCE_OK defines the level where runtime calibration results are
+ * valid.
+ */
+#define CONFIDENCE_OK (3)
+/* Default idle injection duration, driver adjust sleep time to meet target
+ * idle ratio. Similar to frequency modulation.
+ */
+#define DEFAULT_DURATION_JIFFIES (6)
+
+static unsigned int target_mwait;
+static struct dentry *debug_dir;
+static bool poll_pkg_cstate_enable;
+
+/* Idle ratio observed using package C-state counters */
+static unsigned int current_ratio;
+
+/* Skip the idle injection till set to true */
+static bool should_skip;
+
+struct powerclamp_data {
+ unsigned int cpu;
+ unsigned int count;
+ unsigned int guard;
+ unsigned int window_size_now;
+ unsigned int target_ratio;
+ bool clamping;
+};
+
+static struct powerclamp_data powerclamp_data;
+
+static struct thermal_cooling_device *cooling_dev;
+
+static DEFINE_MUTEX(powerclamp_lock);
+
+/* This duration is in microseconds */
+static unsigned int duration;
+static unsigned int pkg_cstate_ratio_cur;
+static unsigned int window_size;
+
+static int duration_set(const char *arg, const struct kernel_param *kp)
+{
+ int ret = 0;
+ unsigned long new_duration;
+
+ ret = kstrtoul(arg, 10, &new_duration);
+ if (ret)
+ goto exit;
+ if (new_duration > 25 || new_duration < 6) {
+ pr_err("Out of recommended range %lu, between 6-25ms\n",
+ new_duration);
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ mutex_lock(&powerclamp_lock);
+ duration = clamp(new_duration, 6ul, 25ul) * 1000;
+ mutex_unlock(&powerclamp_lock);
+exit:
+
+ return ret;
+}
+
+static int duration_get(char *buf, const struct kernel_param *kp)
+{
+ int ret;
+
+ mutex_lock(&powerclamp_lock);
+ ret = sysfs_emit(buf, "%d\n", duration / 1000);
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+static const struct kernel_param_ops duration_ops = {
+ .set = duration_set,
+ .get = duration_get,
+};
+
+module_param_cb(duration, &duration_ops, NULL, 0644);
+MODULE_PARM_DESC(duration, "forced idle time for each attempt in msec.");
+
+#define DEFAULT_MAX_IDLE 50
+#define MAX_ALL_CPU_IDLE 75
+
+static u8 max_idle = DEFAULT_MAX_IDLE;
+
+static cpumask_var_t idle_injection_cpu_mask;
+
+static int allocate_copy_idle_injection_mask(const struct cpumask *copy_mask)
+{
+ if (cpumask_available(idle_injection_cpu_mask))
+ goto copy_mask;
+
+ /* This mask is allocated only one time and freed during module exit */
+ if (!alloc_cpumask_var(&idle_injection_cpu_mask, GFP_KERNEL))
+ return -ENOMEM;
+
+copy_mask:
+ cpumask_copy(idle_injection_cpu_mask, copy_mask);
+
+ return 0;
+}
+
+/* Return true if the cpumask and idle percent combination is invalid */
+static bool check_invalid(cpumask_var_t mask, u8 idle)
+{
+ if (cpumask_equal(cpu_present_mask, mask) && idle > MAX_ALL_CPU_IDLE)
+ return true;
+
+ return false;
+}
+
+static int cpumask_set(const char *arg, const struct kernel_param *kp)
+{
+ cpumask_var_t new_mask;
+ int ret;
+
+ mutex_lock(&powerclamp_lock);
+
+ /* Can't set mask when cooling device is in use */
+ if (powerclamp_data.clamping) {
+ ret = -EAGAIN;
+ goto skip_cpumask_set;
+ }
+
+ ret = alloc_cpumask_var(&new_mask, GFP_KERNEL);
+ if (!ret)
+ goto skip_cpumask_set;
+
+ ret = bitmap_parse(arg, strlen(arg), cpumask_bits(new_mask),
+ nr_cpumask_bits);
+ if (ret)
+ goto free_cpumask_set;
+
+ if (cpumask_empty(new_mask) || check_invalid(new_mask, max_idle)) {
+ ret = -EINVAL;
+ goto free_cpumask_set;
+ }
+
+ /*
+ * When module parameters are passed from kernel command line
+ * during insmod, the module parameter callback is called
+ * before powerclamp_init(), so we can't assume that some
+ * cpumask can be allocated and copied before here. Also
+ * in this case this cpumask is used as the default mask.
+ */
+ ret = allocate_copy_idle_injection_mask(new_mask);
+
+free_cpumask_set:
+ free_cpumask_var(new_mask);
+skip_cpumask_set:
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+static int cpumask_get(char *buf, const struct kernel_param *kp)
+{
+ if (!cpumask_available(idle_injection_cpu_mask))
+ return -ENODEV;
+
+ return bitmap_print_to_pagebuf(false, buf, cpumask_bits(idle_injection_cpu_mask),
+ nr_cpumask_bits);
+}
+
+static const struct kernel_param_ops cpumask_ops = {
+ .set = cpumask_set,
+ .get = cpumask_get,
+};
+
+module_param_cb(cpumask, &cpumask_ops, NULL, 0644);
+MODULE_PARM_DESC(cpumask, "Mask of CPUs to use for idle injection.");
+
+static int max_idle_set(const char *arg, const struct kernel_param *kp)
+{
+ u8 new_max_idle;
+ int ret = 0;
+
+ mutex_lock(&powerclamp_lock);
+
+ /* Can't set mask when cooling device is in use */
+ if (powerclamp_data.clamping) {
+ ret = -EAGAIN;
+ goto skip_limit_set;
+ }
+
+ ret = kstrtou8(arg, 10, &new_max_idle);
+ if (ret)
+ goto skip_limit_set;
+
+ if (new_max_idle > MAX_TARGET_RATIO) {
+ ret = -EINVAL;
+ goto skip_limit_set;
+ }
+
+ if (!cpumask_available(idle_injection_cpu_mask)) {
+ ret = allocate_copy_idle_injection_mask(cpu_present_mask);
+ if (ret)
+ goto skip_limit_set;
+ }
+
+ if (check_invalid(idle_injection_cpu_mask, new_max_idle)) {
+ ret = -EINVAL;
+ goto skip_limit_set;
+ }
+
+ max_idle = new_max_idle;
+
+skip_limit_set:
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+static const struct kernel_param_ops max_idle_ops = {
+ .set = max_idle_set,
+ .get = param_get_byte,
+};
+
+module_param_cb(max_idle, &max_idle_ops, &max_idle, 0644);
+MODULE_PARM_DESC(max_idle, "maximum injected idle time to the total CPU time ratio in percent range:1-100");
+
+struct powerclamp_calibration_data {
+ unsigned long confidence; /* used for calibration, basically a counter
+ * gets incremented each time a clamping
+ * period is completed without extra wakeups
+ * once that counter is reached given level,
+ * compensation is deemed usable.
+ */
+ unsigned long steady_comp; /* steady state compensation used when
+ * no extra wakeups occurred.
+ */
+ unsigned long dynamic_comp; /* compensate excessive wakeup from idle
+ * mostly from external interrupts.
+ */
+};
+
+static struct powerclamp_calibration_data cal_data[MAX_TARGET_RATIO];
+
+static int window_size_set(const char *arg, const struct kernel_param *kp)
+{
+ int ret = 0;
+ unsigned long new_window_size;
+
+ ret = kstrtoul(arg, 10, &new_window_size);
+ if (ret)
+ goto exit_win;
+ if (new_window_size > 10 || new_window_size < 2) {
+ pr_err("Out of recommended window size %lu, between 2-10\n",
+ new_window_size);
+ ret = -EINVAL;
+ }
+
+ window_size = clamp(new_window_size, 2ul, 10ul);
+ smp_mb();
+
+exit_win:
+
+ return ret;
+}
+
+static const struct kernel_param_ops window_size_ops = {
+ .set = window_size_set,
+ .get = param_get_int,
+};
+
+module_param_cb(window_size, &window_size_ops, &window_size, 0644);
+MODULE_PARM_DESC(window_size, "sliding window in number of clamping cycles\n"
+ "\tpowerclamp controls idle ratio within this window. larger\n"
+ "\twindow size results in slower response time but more smooth\n"
+ "\tclamping results. default to 2.");
+
+static void find_target_mwait(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int highest_cstate = 0;
+ unsigned int highest_subcstate = 0;
+ int i;
+
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+ !(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ return;
+
+ edx >>= MWAIT_SUBSTATE_SIZE;
+ for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+ if (edx & MWAIT_SUBSTATE_MASK) {
+ highest_cstate = i;
+ highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+ }
+ }
+ target_mwait = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+ (highest_subcstate - 1);
+
+}
+
+struct pkg_cstate_info {
+ bool skip;
+ int msr_index;
+ int cstate_id;
+};
+
+#define PKG_CSTATE_INIT(id) { \
+ .msr_index = MSR_PKG_C##id##_RESIDENCY, \
+ .cstate_id = id \
+ }
+
+static struct pkg_cstate_info pkg_cstates[] = {
+ PKG_CSTATE_INIT(2),
+ PKG_CSTATE_INIT(3),
+ PKG_CSTATE_INIT(6),
+ PKG_CSTATE_INIT(7),
+ PKG_CSTATE_INIT(8),
+ PKG_CSTATE_INIT(9),
+ PKG_CSTATE_INIT(10),
+ {NULL},
+};
+
+static bool has_pkg_state_counter(void)
+{
+ u64 val;
+ struct pkg_cstate_info *info = pkg_cstates;
+
+ /* check if any one of the counter msrs exists */
+ while (info->msr_index) {
+ if (!rdmsrl_safe(info->msr_index, &val))
+ return true;
+ info++;
+ }
+
+ return false;
+}
+
+static u64 pkg_state_counter(void)
+{
+ u64 val;
+ u64 count = 0;
+ struct pkg_cstate_info *info = pkg_cstates;
+
+ while (info->msr_index) {
+ if (!info->skip) {
+ if (!rdmsrl_safe(info->msr_index, &val))
+ count += val;
+ else
+ info->skip = true;
+ }
+ info++;
+ }
+
+ return count;
+}
+
+static unsigned int get_compensation(int ratio)
+{
+ unsigned int comp = 0;
+
+ if (!poll_pkg_cstate_enable)
+ return 0;
+
+ /* we only use compensation if all adjacent ones are good */
+ if (ratio == 1 &&
+ cal_data[ratio].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio + 1].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio + 2].confidence >= CONFIDENCE_OK) {
+ comp = (cal_data[ratio].steady_comp +
+ cal_data[ratio + 1].steady_comp +
+ cal_data[ratio + 2].steady_comp) / 3;
+ } else if (ratio == MAX_TARGET_RATIO - 1 &&
+ cal_data[ratio].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio - 2].confidence >= CONFIDENCE_OK) {
+ comp = (cal_data[ratio].steady_comp +
+ cal_data[ratio - 1].steady_comp +
+ cal_data[ratio - 2].steady_comp) / 3;
+ } else if (cal_data[ratio].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio - 1].confidence >= CONFIDENCE_OK &&
+ cal_data[ratio + 1].confidence >= CONFIDENCE_OK) {
+ comp = (cal_data[ratio].steady_comp +
+ cal_data[ratio - 1].steady_comp +
+ cal_data[ratio + 1].steady_comp) / 3;
+ }
+
+ /* do not exceed limit */
+ if (comp + ratio >= MAX_TARGET_RATIO)
+ comp = MAX_TARGET_RATIO - ratio - 1;
+
+ return comp;
+}
+
+static void adjust_compensation(int target_ratio, unsigned int win)
+{
+ int delta;
+ struct powerclamp_calibration_data *d = &cal_data[target_ratio];
+
+ /*
+ * adjust compensations if confidence level has not been reached.
+ */
+ if (d->confidence >= CONFIDENCE_OK)
+ return;
+
+ delta = powerclamp_data.target_ratio - current_ratio;
+ /* filter out bad data */
+ if (delta >= 0 && delta <= (1+target_ratio/10)) {
+ if (d->steady_comp)
+ d->steady_comp =
+ roundup(delta+d->steady_comp, 2)/2;
+ else
+ d->steady_comp = delta;
+ d->confidence++;
+ }
+}
+
+static bool powerclamp_adjust_controls(unsigned int target_ratio,
+ unsigned int guard, unsigned int win)
+{
+ static u64 msr_last, tsc_last;
+ u64 msr_now, tsc_now;
+ u64 val64;
+
+ /* check result for the last window */
+ msr_now = pkg_state_counter();
+ tsc_now = rdtsc();
+
+ /* calculate pkg cstate vs tsc ratio */
+ if (!msr_last || !tsc_last)
+ current_ratio = 1;
+ else if (tsc_now-tsc_last) {
+ val64 = 100*(msr_now-msr_last);
+ do_div(val64, (tsc_now-tsc_last));
+ current_ratio = val64;
+ }
+
+ /* update record */
+ msr_last = msr_now;
+ tsc_last = tsc_now;
+
+ adjust_compensation(target_ratio, win);
+
+ /* if we are above target+guard, skip */
+ return powerclamp_data.target_ratio + guard <= current_ratio;
+}
+
+/*
+ * This function calculates runtime from the current target ratio.
+ * This function gets called under powerclamp_lock.
+ */
+static unsigned int get_run_time(void)
+{
+ unsigned int compensated_ratio;
+ unsigned int runtime;
+
+ /*
+ * make sure user selected ratio does not take effect until
+ * the next round. adjust target_ratio if user has changed
+ * target such that we can converge quickly.
+ */
+ powerclamp_data.guard = 1 + powerclamp_data.target_ratio / 20;
+ powerclamp_data.window_size_now = window_size;
+
+ /*
+ * systems may have different ability to enter package level
+ * c-states, thus we need to compensate the injected idle ratio
+ * to achieve the actual target reported by the HW.
+ */
+ compensated_ratio = powerclamp_data.target_ratio +
+ get_compensation(powerclamp_data.target_ratio);
+ if (compensated_ratio <= 0)
+ compensated_ratio = 1;
+
+ runtime = duration * 100 / compensated_ratio - duration;
+
+ return runtime;
+}
+
+/*
+ * 1 HZ polling while clamping is active, useful for userspace
+ * to monitor actual idle ratio.
+ */
+static void poll_pkg_cstate(struct work_struct *dummy);
+static DECLARE_DELAYED_WORK(poll_pkg_cstate_work, poll_pkg_cstate);
+static void poll_pkg_cstate(struct work_struct *dummy)
+{
+ static u64 msr_last;
+ static u64 tsc_last;
+
+ u64 msr_now;
+ u64 tsc_now;
+ u64 val64;
+
+ msr_now = pkg_state_counter();
+ tsc_now = rdtsc();
+
+ /* calculate pkg cstate vs tsc ratio */
+ if (!msr_last || !tsc_last)
+ pkg_cstate_ratio_cur = 1;
+ else {
+ if (tsc_now - tsc_last) {
+ val64 = 100 * (msr_now - msr_last);
+ do_div(val64, (tsc_now - tsc_last));
+ pkg_cstate_ratio_cur = val64;
+ }
+ }
+
+ /* update record */
+ msr_last = msr_now;
+ tsc_last = tsc_now;
+
+ mutex_lock(&powerclamp_lock);
+ if (powerclamp_data.clamping)
+ schedule_delayed_work(&poll_pkg_cstate_work, HZ);
+ mutex_unlock(&powerclamp_lock);
+}
+
+static struct idle_inject_device *ii_dev;
+
+/*
+ * This function is called from idle injection core on timer expiry
+ * for the run duration. This allows powerclamp to readjust or skip
+ * injecting idle for this cycle.
+ */
+static bool idle_inject_update(void)
+{
+ bool update = false;
+
+ /* We can't sleep in this callback */
+ if (!mutex_trylock(&powerclamp_lock))
+ return true;
+
+ if (!(powerclamp_data.count % powerclamp_data.window_size_now)) {
+
+ should_skip = powerclamp_adjust_controls(powerclamp_data.target_ratio,
+ powerclamp_data.guard,
+ powerclamp_data.window_size_now);
+ update = true;
+ }
+
+ if (update) {
+ unsigned int runtime = get_run_time();
+
+ idle_inject_set_duration(ii_dev, runtime, duration);
+ }
+
+ powerclamp_data.count++;
+
+ mutex_unlock(&powerclamp_lock);
+
+ if (should_skip)
+ return false;
+
+ return true;
+}
+
+/* This function starts idle injection by calling idle_inject_start() */
+static void trigger_idle_injection(void)
+{
+ unsigned int runtime = get_run_time();
+
+ idle_inject_set_duration(ii_dev, runtime, duration);
+ idle_inject_start(ii_dev);
+ powerclamp_data.clamping = true;
+}
+
+/*
+ * This function is called from start_power_clamp() to register
+ * CPUS with powercap idle injection register and set default
+ * idle duration and latency.
+ */
+static int powerclamp_idle_injection_register(void)
+{
+ poll_pkg_cstate_enable = false;
+ if (cpumask_equal(cpu_present_mask, idle_injection_cpu_mask)) {
+ ii_dev = idle_inject_register_full(idle_injection_cpu_mask, idle_inject_update);
+ if (topology_max_packages() == 1 && topology_max_die_per_package() == 1)
+ poll_pkg_cstate_enable = true;
+ } else {
+ ii_dev = idle_inject_register(idle_injection_cpu_mask);
+ }
+
+ if (!ii_dev) {
+ pr_err("powerclamp: idle_inject_register failed\n");
+ return -EAGAIN;
+ }
+
+ idle_inject_set_duration(ii_dev, TICK_USEC, duration);
+ idle_inject_set_latency(ii_dev, UINT_MAX);
+
+ return 0;
+}
+
+/*
+ * This function is called from end_power_clamp() to stop idle injection
+ * and unregister CPUS from powercap idle injection core.
+ */
+static void remove_idle_injection(void)
+{
+ if (!powerclamp_data.clamping)
+ return;
+
+ powerclamp_data.clamping = false;
+ idle_inject_stop(ii_dev);
+}
+
+/*
+ * This function is called when user change the cooling device
+ * state from zero to some other value.
+ */
+static int start_power_clamp(void)
+{
+ int ret;
+
+ ret = powerclamp_idle_injection_register();
+ if (!ret) {
+ trigger_idle_injection();
+ if (poll_pkg_cstate_enable)
+ schedule_delayed_work(&poll_pkg_cstate_work, 0);
+ }
+
+ return ret;
+}
+
+/*
+ * This function is called when user change the cooling device
+ * state from non zero value zero.
+ */
+static void end_power_clamp(void)
+{
+ if (powerclamp_data.clamping) {
+ remove_idle_injection();
+ idle_inject_unregister(ii_dev);
+ }
+}
+
+static int powerclamp_get_max_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ *state = MAX_TARGET_RATIO;
+
+ return 0;
+}
+
+static int powerclamp_get_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long *state)
+{
+ mutex_lock(&powerclamp_lock);
+ *state = powerclamp_data.target_ratio;
+ mutex_unlock(&powerclamp_lock);
+
+ return 0;
+}
+
+static int powerclamp_set_cur_state(struct thermal_cooling_device *cdev,
+ unsigned long new_target_ratio)
+{
+ int ret = 0;
+
+ mutex_lock(&powerclamp_lock);
+
+ new_target_ratio = clamp(new_target_ratio, 0UL,
+ (unsigned long) (max_idle - 1));
+
+ if (powerclamp_data.target_ratio == new_target_ratio)
+ goto exit_set;
+
+ if (!powerclamp_data.target_ratio && new_target_ratio > 0) {
+ pr_info("Start idle injection to reduce power\n");
+ powerclamp_data.target_ratio = new_target_ratio;
+ ret = start_power_clamp();
+ if (ret)
+ powerclamp_data.target_ratio = 0;
+ goto exit_set;
+ } else if (powerclamp_data.target_ratio > 0 && new_target_ratio == 0) {
+ pr_info("Stop forced idle injection\n");
+ end_power_clamp();
+ powerclamp_data.target_ratio = 0;
+ } else /* adjust currently running */ {
+ unsigned int runtime;
+
+ powerclamp_data.target_ratio = new_target_ratio;
+ runtime = get_run_time();
+ idle_inject_set_duration(ii_dev, runtime, duration);
+ }
+
+exit_set:
+ mutex_unlock(&powerclamp_lock);
+
+ return ret;
+}
+
+/* bind to generic thermal layer as cooling device*/
+static const struct thermal_cooling_device_ops powerclamp_cooling_ops = {
+ .get_max_state = powerclamp_get_max_state,
+ .get_cur_state = powerclamp_get_cur_state,
+ .set_cur_state = powerclamp_set_cur_state,
+};
+
+static const struct x86_cpu_id __initconst intel_powerclamp_ids[] = {
+ X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_MWAIT, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);
+
+static int __init powerclamp_probe(void)
+{
+
+ if (!x86_match_cpu(intel_powerclamp_ids)) {
+ pr_err("CPU does not support MWAIT\n");
+ return -ENODEV;
+ }
+
+ /* The goal for idle time alignment is to achieve package cstate. */
+ if (!has_pkg_state_counter()) {
+ pr_info("No package C-state available\n");
+ return -ENODEV;
+ }
+
+ /* find the deepest mwait value */
+ find_target_mwait();
+
+ return 0;
+}
+
+static int powerclamp_debug_show(struct seq_file *m, void *unused)
+{
+ int i = 0;
+
+ seq_printf(m, "pct confidence steady dynamic (compensation)\n");
+ for (i = 0; i < MAX_TARGET_RATIO; i++) {
+ seq_printf(m, "%d\t%lu\t%lu\t%lu\n",
+ i,
+ cal_data[i].confidence,
+ cal_data[i].steady_comp,
+ cal_data[i].dynamic_comp);
+ }
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(powerclamp_debug);
+
+static inline void powerclamp_create_debug_files(void)
+{
+ debug_dir = debugfs_create_dir("intel_powerclamp", NULL);
+
+ debugfs_create_file("powerclamp_calib", S_IRUGO, debug_dir, cal_data,
+ &powerclamp_debug_fops);
+}
+
+static int __init powerclamp_init(void)
+{
+ int retval;
+
+ /* probe cpu features and ids here */
+ retval = powerclamp_probe();
+ if (retval)
+ return retval;
+
+ mutex_lock(&powerclamp_lock);
+ if (!cpumask_available(idle_injection_cpu_mask))
+ retval = allocate_copy_idle_injection_mask(cpu_present_mask);
+ mutex_unlock(&powerclamp_lock);
+
+ if (retval)
+ return retval;
+
+ /* set default limit, maybe adjusted during runtime based on feedback */
+ window_size = 2;
+
+ cooling_dev = thermal_cooling_device_register("intel_powerclamp", NULL,
+ &powerclamp_cooling_ops);
+ if (IS_ERR(cooling_dev))
+ return -ENODEV;
+
+ if (!duration)
+ duration = jiffies_to_usecs(DEFAULT_DURATION_JIFFIES);
+
+ powerclamp_create_debug_files();
+
+ return 0;
+}
+module_init(powerclamp_init);
+
+static void __exit powerclamp_exit(void)
+{
+ mutex_lock(&powerclamp_lock);
+ end_power_clamp();
+ mutex_unlock(&powerclamp_lock);
+
+ thermal_cooling_device_unregister(cooling_dev);
+
+ cancel_delayed_work_sync(&poll_pkg_cstate_work);
+ debugfs_remove_recursive(debug_dir);
+
+ if (cpumask_available(idle_injection_cpu_mask))
+ free_cpumask_var(idle_injection_cpu_mask);
+}
+module_exit(powerclamp_exit);
+
+MODULE_IMPORT_NS(IDLE_INJECT);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arjan van de Ven <arjan@linux.intel.com>");
+MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@linux.intel.com>");
+MODULE_DESCRIPTION("Package Level C-state Idle Injection for Intel CPUs");
diff --git a/drivers/thermal/intel/intel_quark_dts_thermal.c b/drivers/thermal/intel/intel_quark_dts_thermal.c
new file mode 100644
index 0000000000..646ca8bd40
--- /dev/null
+++ b/drivers/thermal/intel/intel_quark_dts_thermal.c
@@ -0,0 +1,423 @@
+/*
+ * intel_quark_dts_thermal.c
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Contact Information:
+ * Ong Boon Leong <boon.leong.ong@intel.com>
+ * Intel Malaysia, Penang
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Quark DTS thermal driver is implemented by referencing
+ * intel_soc_dts_thermal.c.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/thermal.h>
+#include <asm/cpu_device_id.h>
+#include <asm/iosf_mbi.h>
+
+/* DTS reset is programmed via QRK_MBI_UNIT_SOC */
+#define QRK_DTS_REG_OFFSET_RESET 0x34
+#define QRK_DTS_RESET_BIT BIT(0)
+
+/* DTS enable is programmed via QRK_MBI_UNIT_RMU */
+#define QRK_DTS_REG_OFFSET_ENABLE 0xB0
+#define QRK_DTS_ENABLE_BIT BIT(15)
+
+/* Temperature Register is read via QRK_MBI_UNIT_RMU */
+#define QRK_DTS_REG_OFFSET_TEMP 0xB1
+#define QRK_DTS_MASK_TEMP 0xFF
+#define QRK_DTS_OFFSET_TEMP 0
+#define QRK_DTS_OFFSET_REL_TEMP 16
+#define QRK_DTS_TEMP_BASE 50
+
+/* Programmable Trip Point Register is configured via QRK_MBI_UNIT_RMU */
+#define QRK_DTS_REG_OFFSET_PTPS 0xB2
+#define QRK_DTS_MASK_TP_THRES 0xFF
+#define QRK_DTS_SHIFT_TP 8
+#define QRK_DTS_ID_TP_CRITICAL 0
+#define QRK_DTS_ID_TP_HOT 1
+#define QRK_DTS_SAFE_TP_THRES 105
+
+/* Thermal Sensor Register Lock */
+#define QRK_DTS_REG_OFFSET_LOCK 0x71
+#define QRK_DTS_LOCK_BIT BIT(5)
+
+/* Quark DTS has 2 trip points: hot & catastrophic */
+#define QRK_MAX_DTS_TRIPS 2
+/* If DTS not locked, all trip points are configurable */
+#define QRK_DTS_WR_MASK_SET 0x3
+/* If DTS locked, all trip points are not configurable */
+#define QRK_DTS_WR_MASK_CLR 0
+
+#define DEFAULT_POLL_DELAY 2000
+
+struct soc_sensor_entry {
+ bool locked;
+ u32 store_ptps;
+ u32 store_dts_enable;
+ struct thermal_zone_device *tzone;
+ struct thermal_trip trips[QRK_MAX_DTS_TRIPS];
+};
+
+static struct soc_sensor_entry *soc_dts;
+
+static int polling_delay = DEFAULT_POLL_DELAY;
+module_param(polling_delay, int, 0644);
+MODULE_PARM_DESC(polling_delay,
+ "Polling interval for checking trip points (in milliseconds)");
+
+static DEFINE_MUTEX(dts_update_mutex);
+
+static int soc_dts_enable(struct thermal_zone_device *tzd)
+{
+ u32 out;
+ struct soc_sensor_entry *aux_entry = thermal_zone_device_priv(tzd);
+ int ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_ENABLE, &out);
+ if (ret)
+ return ret;
+
+ if (out & QRK_DTS_ENABLE_BIT)
+ return 0;
+
+ if (!aux_entry->locked) {
+ out |= QRK_DTS_ENABLE_BIT;
+ ret = iosf_mbi_write(QRK_MBI_UNIT_RMU, MBI_REG_WRITE,
+ QRK_DTS_REG_OFFSET_ENABLE, out);
+ if (ret)
+ return ret;
+ } else {
+ pr_info("DTS is locked. Cannot enable DTS\n");
+ ret = -EPERM;
+ }
+
+ return ret;
+}
+
+static int soc_dts_disable(struct thermal_zone_device *tzd)
+{
+ u32 out;
+ struct soc_sensor_entry *aux_entry = thermal_zone_device_priv(tzd);
+ int ret;
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_ENABLE, &out);
+ if (ret)
+ return ret;
+
+ if (!(out & QRK_DTS_ENABLE_BIT))
+ return 0;
+
+ if (!aux_entry->locked) {
+ out &= ~QRK_DTS_ENABLE_BIT;
+ ret = iosf_mbi_write(QRK_MBI_UNIT_RMU, MBI_REG_WRITE,
+ QRK_DTS_REG_OFFSET_ENABLE, out);
+
+ if (ret)
+ return ret;
+ } else {
+ pr_info("DTS is locked. Cannot disable DTS\n");
+ ret = -EPERM;
+ }
+
+ return ret;
+}
+
+static int get_trip_temp(int trip)
+{
+ int status, temp;
+ u32 out;
+
+ mutex_lock(&dts_update_mutex);
+ status = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_PTPS, &out);
+ mutex_unlock(&dts_update_mutex);
+
+ if (status)
+ return THERMAL_TEMP_INVALID;
+
+ /*
+ * Thermal Sensor Programmable Trip Point Register has 8-bit
+ * fields for critical (catastrophic) and hot set trip point
+ * thresholds. The threshold value is always offset by its
+ * temperature base (50 degree Celsius).
+ */
+ temp = (out >> (trip * QRK_DTS_SHIFT_TP)) & QRK_DTS_MASK_TP_THRES;
+ temp -= QRK_DTS_TEMP_BASE;
+
+ return temp;
+}
+
+static int update_trip_temp(struct soc_sensor_entry *aux_entry,
+ int trip, int temp)
+{
+ u32 out;
+ u32 temp_out;
+ u32 store_ptps;
+ int ret;
+
+ mutex_lock(&dts_update_mutex);
+ if (aux_entry->locked) {
+ ret = -EPERM;
+ goto failed;
+ }
+
+ ret = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_PTPS, &store_ptps);
+ if (ret)
+ goto failed;
+
+ /*
+ * Protection against unsafe trip point thresdhold value.
+ * As Quark X1000 data-sheet does not provide any recommendation
+ * regarding the safe trip point threshold value to use, we choose
+ * the safe value according to the threshold value set by UEFI BIOS.
+ */
+ if (temp > QRK_DTS_SAFE_TP_THRES)
+ temp = QRK_DTS_SAFE_TP_THRES;
+
+ /*
+ * Thermal Sensor Programmable Trip Point Register has 8-bit
+ * fields for critical (catastrophic) and hot set trip point
+ * thresholds. The threshold value is always offset by its
+ * temperature base (50 degree Celsius).
+ */
+ temp_out = temp + QRK_DTS_TEMP_BASE;
+ out = (store_ptps & ~(QRK_DTS_MASK_TP_THRES <<
+ (trip * QRK_DTS_SHIFT_TP)));
+ out |= (temp_out & QRK_DTS_MASK_TP_THRES) <<
+ (trip * QRK_DTS_SHIFT_TP);
+
+ ret = iosf_mbi_write(QRK_MBI_UNIT_RMU, MBI_REG_WRITE,
+ QRK_DTS_REG_OFFSET_PTPS, out);
+
+failed:
+ mutex_unlock(&dts_update_mutex);
+ return ret;
+}
+
+static inline int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
+ int temp)
+{
+ return update_trip_temp(thermal_zone_device_priv(tzd), trip, temp);
+}
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzd,
+ int *temp)
+{
+ u32 out;
+ int ret;
+
+ mutex_lock(&dts_update_mutex);
+ ret = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_TEMP, &out);
+ mutex_unlock(&dts_update_mutex);
+
+ if (ret)
+ return ret;
+
+ /*
+ * Thermal Sensor Temperature Register has 8-bit field
+ * for temperature value (offset by temperature base
+ * 50 degree Celsius).
+ */
+ out = (out >> QRK_DTS_OFFSET_TEMP) & QRK_DTS_MASK_TEMP;
+ *temp = out - QRK_DTS_TEMP_BASE;
+
+ return 0;
+}
+
+static int sys_change_mode(struct thermal_zone_device *tzd,
+ enum thermal_device_mode mode)
+{
+ int ret;
+
+ mutex_lock(&dts_update_mutex);
+ if (mode == THERMAL_DEVICE_ENABLED)
+ ret = soc_dts_enable(tzd);
+ else
+ ret = soc_dts_disable(tzd);
+ mutex_unlock(&dts_update_mutex);
+
+ return ret;
+}
+
+static struct thermal_zone_device_ops tzone_ops = {
+ .get_temp = sys_get_curr_temp,
+ .set_trip_temp = sys_set_trip_temp,
+ .change_mode = sys_change_mode,
+};
+
+static void free_soc_dts(struct soc_sensor_entry *aux_entry)
+{
+ if (aux_entry) {
+ if (!aux_entry->locked) {
+ mutex_lock(&dts_update_mutex);
+ iosf_mbi_write(QRK_MBI_UNIT_RMU, MBI_REG_WRITE,
+ QRK_DTS_REG_OFFSET_ENABLE,
+ aux_entry->store_dts_enable);
+
+ iosf_mbi_write(QRK_MBI_UNIT_RMU, MBI_REG_WRITE,
+ QRK_DTS_REG_OFFSET_PTPS,
+ aux_entry->store_ptps);
+ mutex_unlock(&dts_update_mutex);
+ }
+ thermal_zone_device_unregister(aux_entry->tzone);
+ kfree(aux_entry);
+ }
+}
+
+static struct soc_sensor_entry *alloc_soc_dts(void)
+{
+ struct soc_sensor_entry *aux_entry;
+ int err;
+ u32 out;
+ int wr_mask;
+
+ aux_entry = kzalloc(sizeof(*aux_entry), GFP_KERNEL);
+ if (!aux_entry) {
+ err = -ENOMEM;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ /* Check if DTS register is locked */
+ err = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_LOCK, &out);
+ if (err)
+ goto err_ret;
+
+ if (out & QRK_DTS_LOCK_BIT) {
+ aux_entry->locked = true;
+ wr_mask = QRK_DTS_WR_MASK_CLR;
+ } else {
+ aux_entry->locked = false;
+ wr_mask = QRK_DTS_WR_MASK_SET;
+ }
+
+ /* Store DTS default state if DTS registers are not locked */
+ if (!aux_entry->locked) {
+ /* Store DTS default enable for restore on exit */
+ err = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_ENABLE,
+ &aux_entry->store_dts_enable);
+ if (err)
+ goto err_ret;
+
+ /* Store DTS default PTPS register for restore on exit */
+ err = iosf_mbi_read(QRK_MBI_UNIT_RMU, MBI_REG_READ,
+ QRK_DTS_REG_OFFSET_PTPS,
+ &aux_entry->store_ptps);
+ if (err)
+ goto err_ret;
+ }
+
+ aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].temperature = get_trip_temp(QRK_DTS_ID_TP_CRITICAL);
+ aux_entry->trips[QRK_DTS_ID_TP_CRITICAL].type = THERMAL_TRIP_CRITICAL;
+
+ aux_entry->trips[QRK_DTS_ID_TP_HOT].temperature = get_trip_temp(QRK_DTS_ID_TP_HOT);
+ aux_entry->trips[QRK_DTS_ID_TP_HOT].type = THERMAL_TRIP_HOT;
+
+ aux_entry->tzone = thermal_zone_device_register_with_trips("quark_dts",
+ aux_entry->trips,
+ QRK_MAX_DTS_TRIPS,
+ wr_mask,
+ aux_entry, &tzone_ops,
+ NULL, 0, polling_delay);
+ if (IS_ERR(aux_entry->tzone)) {
+ err = PTR_ERR(aux_entry->tzone);
+ goto err_ret;
+ }
+
+ err = thermal_zone_device_enable(aux_entry->tzone);
+ if (err)
+ goto err_aux_status;
+
+ return aux_entry;
+
+err_aux_status:
+ thermal_zone_device_unregister(aux_entry->tzone);
+err_ret:
+ kfree(aux_entry);
+ return ERR_PTR(err);
+}
+
+static const struct x86_cpu_id qrk_thermal_ids[] __initconst = {
+ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 5, INTEL_FAM5_QUARK_X1000, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, qrk_thermal_ids);
+
+static int __init intel_quark_thermal_init(void)
+{
+ if (!x86_match_cpu(qrk_thermal_ids) || !iosf_mbi_available())
+ return -ENODEV;
+
+ soc_dts = alloc_soc_dts();
+ if (IS_ERR(soc_dts))
+ return PTR_ERR(soc_dts);
+
+ return 0;
+}
+
+static void __exit intel_quark_thermal_exit(void)
+{
+ free_soc_dts(soc_dts);
+}
+
+module_init(intel_quark_thermal_init)
+module_exit(intel_quark_thermal_exit)
+
+MODULE_DESCRIPTION("Intel Quark DTS Thermal Driver");
+MODULE_AUTHOR("Ong Boon Leong <boon.leong.ong@intel.com>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.c b/drivers/thermal/intel/intel_soc_dts_iosf.c
new file mode 100644
index 0000000000..d00def3c47
--- /dev/null
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_soc_dts_iosf.c
+ * Copyright (c) 2015, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/intel_tcc.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <asm/iosf_mbi.h>
+#include "intel_soc_dts_iosf.h"
+
+#define SOC_DTS_OFFSET_ENABLE 0xB0
+#define SOC_DTS_OFFSET_TEMP 0xB1
+
+#define SOC_DTS_OFFSET_PTPS 0xB2
+#define SOC_DTS_OFFSET_PTTS 0xB3
+#define SOC_DTS_OFFSET_PTTSS 0xB4
+#define SOC_DTS_OFFSET_PTMC 0x80
+#define SOC_DTS_TE_AUX0 0xB5
+#define SOC_DTS_TE_AUX1 0xB6
+
+#define SOC_DTS_AUX0_ENABLE_BIT BIT(0)
+#define SOC_DTS_AUX1_ENABLE_BIT BIT(1)
+#define SOC_DTS_CPU_MODULE0_ENABLE_BIT BIT(16)
+#define SOC_DTS_CPU_MODULE1_ENABLE_BIT BIT(17)
+#define SOC_DTS_TE_SCI_ENABLE BIT(9)
+#define SOC_DTS_TE_SMI_ENABLE BIT(10)
+#define SOC_DTS_TE_MSI_ENABLE BIT(11)
+#define SOC_DTS_TE_APICA_ENABLE BIT(14)
+#define SOC_DTS_PTMC_APIC_DEASSERT_BIT BIT(4)
+
+/* DTS encoding for TJ MAX temperature */
+#define SOC_DTS_TJMAX_ENCODING 0x7F
+
+/* Mask for two trips in status bits */
+#define SOC_DTS_TRIP_MASK 0x03
+
+static int update_trip_temp(struct intel_soc_dts_sensors *sensors,
+ int thres_index, int temp)
+{
+ int status;
+ u32 temp_out;
+ u32 out;
+ unsigned long update_ptps;
+ u32 store_ptps;
+ u32 store_ptmc;
+ u32 store_te_out;
+ u32 te_out;
+ u32 int_enable_bit = SOC_DTS_TE_APICA_ENABLE;
+
+ if (sensors->intr_type == INTEL_SOC_DTS_INTERRUPT_MSI)
+ int_enable_bit |= SOC_DTS_TE_MSI_ENABLE;
+
+ temp_out = (sensors->tj_max - temp) / 1000;
+
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_PTPS, &store_ptps);
+ if (status)
+ return status;
+
+ update_ptps = store_ptps;
+ bitmap_set_value8(&update_ptps, temp_out & 0xFF, thres_index * 8);
+ out = update_ptps;
+
+ status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTPS, out);
+ if (status)
+ return status;
+
+ pr_debug("update_trip_temp PTPS = %x\n", out);
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_PTMC, &out);
+ if (status)
+ goto err_restore_ptps;
+
+ store_ptmc = out;
+
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_TE_AUX0 + thres_index,
+ &te_out);
+ if (status)
+ goto err_restore_ptmc;
+
+ store_te_out = te_out;
+ /* Enable for CPU module 0 and module 1 */
+ out |= (SOC_DTS_CPU_MODULE0_ENABLE_BIT |
+ SOC_DTS_CPU_MODULE1_ENABLE_BIT);
+ if (temp) {
+ if (thres_index)
+ out |= SOC_DTS_AUX1_ENABLE_BIT;
+ else
+ out |= SOC_DTS_AUX0_ENABLE_BIT;
+ te_out |= int_enable_bit;
+ } else {
+ if (thres_index)
+ out &= ~SOC_DTS_AUX1_ENABLE_BIT;
+ else
+ out &= ~SOC_DTS_AUX0_ENABLE_BIT;
+ te_out &= ~int_enable_bit;
+ }
+ status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTMC, out);
+ if (status)
+ goto err_restore_te_out;
+
+ status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_TE_AUX0 + thres_index,
+ te_out);
+ if (status)
+ goto err_restore_te_out;
+
+ return 0;
+err_restore_te_out:
+ iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTMC, store_te_out);
+err_restore_ptmc:
+ iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTMC, store_ptmc);
+err_restore_ptps:
+ iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTPS, store_ptps);
+ /* Nothing we can do if restore fails */
+
+ return status;
+}
+
+static int configure_trip(struct intel_soc_dts_sensor_entry *dts,
+ int thres_index, enum thermal_trip_type trip_type,
+ int temp)
+{
+ int ret;
+
+ ret = update_trip_temp(dts->sensors, thres_index, temp);
+ if (ret)
+ return ret;
+
+ dts->trips[thres_index].temperature = temp;
+ dts->trips[thres_index].type = trip_type;
+
+ return 0;
+}
+
+static int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip,
+ int temp)
+{
+ struct intel_soc_dts_sensor_entry *dts = thermal_zone_device_priv(tzd);
+ struct intel_soc_dts_sensors *sensors = dts->sensors;
+ int status;
+
+ if (temp > sensors->tj_max)
+ return -EINVAL;
+
+ mutex_lock(&sensors->dts_update_lock);
+ status = update_trip_temp(sensors, trip, temp);
+ mutex_unlock(&sensors->dts_update_lock);
+
+ return status;
+}
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzd,
+ int *temp)
+{
+ int status;
+ u32 out;
+ struct intel_soc_dts_sensor_entry *dts = thermal_zone_device_priv(tzd);
+ struct intel_soc_dts_sensors *sensors;
+ unsigned long raw;
+
+ sensors = dts->sensors;
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_TEMP, &out);
+ if (status)
+ return status;
+
+ raw = out;
+ out = bitmap_get_value8(&raw, dts->id * 8) - SOC_DTS_TJMAX_ENCODING;
+ *temp = sensors->tj_max - out * 1000;
+
+ return 0;
+}
+
+static struct thermal_zone_device_ops tzone_ops = {
+ .get_temp = sys_get_curr_temp,
+ .set_trip_temp = sys_set_trip_temp,
+};
+
+static int soc_dts_enable(int id)
+{
+ u32 out;
+ int ret;
+
+ ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_ENABLE, &out);
+ if (ret)
+ return ret;
+
+ if (!(out & BIT(id))) {
+ out |= BIT(id);
+ ret = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_ENABLE, out);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static void remove_dts_thermal_zone(struct intel_soc_dts_sensor_entry *dts)
+{
+ iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_ENABLE, dts->store_status);
+ thermal_zone_device_unregister(dts->tzone);
+}
+
+static int add_dts_thermal_zone(int id, struct intel_soc_dts_sensor_entry *dts,
+ bool critical_trip)
+{
+ int writable_trip_cnt = SOC_MAX_DTS_TRIPS;
+ char name[10];
+ unsigned long trip;
+ int trip_mask;
+ unsigned long ptps;
+ u32 store_ptps;
+ unsigned long i;
+ int ret;
+
+ /* Store status to restor on exit */
+ ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_ENABLE, &dts->store_status);
+ if (ret)
+ goto err_ret;
+
+ dts->id = id;
+
+ if (critical_trip)
+ writable_trip_cnt--;
+
+ trip_mask = GENMASK(writable_trip_cnt - 1, 0);
+
+ /* Check if the writable trip we provide is not used by BIOS */
+ ret = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_PTPS, &store_ptps);
+ if (ret)
+ trip_mask = 0;
+ else {
+ ptps = store_ptps;
+ for_each_set_clump8(i, trip, &ptps, writable_trip_cnt * 8)
+ trip_mask &= ~BIT(i / 8);
+ }
+ dts->trip_mask = trip_mask;
+ snprintf(name, sizeof(name), "soc_dts%d", id);
+ dts->tzone = thermal_zone_device_register_with_trips(name, dts->trips,
+ SOC_MAX_DTS_TRIPS,
+ trip_mask,
+ dts, &tzone_ops,
+ NULL, 0, 0);
+ if (IS_ERR(dts->tzone)) {
+ ret = PTR_ERR(dts->tzone);
+ goto err_ret;
+ }
+ ret = thermal_zone_device_enable(dts->tzone);
+ if (ret)
+ goto err_enable;
+
+ ret = soc_dts_enable(id);
+ if (ret)
+ goto err_enable;
+
+ return 0;
+err_enable:
+ thermal_zone_device_unregister(dts->tzone);
+err_ret:
+ return ret;
+}
+
+void intel_soc_dts_iosf_interrupt_handler(struct intel_soc_dts_sensors *sensors)
+{
+ u32 sticky_out;
+ int status;
+ u32 ptmc_out;
+ unsigned long flags;
+
+ spin_lock_irqsave(&sensors->intr_notify_lock, flags);
+
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_PTMC, &ptmc_out);
+ ptmc_out |= SOC_DTS_PTMC_APIC_DEASSERT_BIT;
+ status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTMC, ptmc_out);
+
+ status = iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_REG_READ,
+ SOC_DTS_OFFSET_PTTSS, &sticky_out);
+ pr_debug("status %d PTTSS %x\n", status, sticky_out);
+ if (sticky_out & SOC_DTS_TRIP_MASK) {
+ int i;
+ /* reset sticky bit */
+ status = iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_REG_WRITE,
+ SOC_DTS_OFFSET_PTTSS, sticky_out);
+ spin_unlock_irqrestore(&sensors->intr_notify_lock, flags);
+
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+ pr_debug("TZD update for zone %d\n", i);
+ thermal_zone_device_update(sensors->soc_dts[i].tzone,
+ THERMAL_EVENT_UNSPECIFIED);
+ }
+ } else
+ spin_unlock_irqrestore(&sensors->intr_notify_lock, flags);
+}
+EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_interrupt_handler);
+
+static void dts_trips_reset(struct intel_soc_dts_sensors *sensors, int dts_index)
+{
+ configure_trip(&sensors->soc_dts[dts_index], 0, 0, 0);
+ configure_trip(&sensors->soc_dts[dts_index], 1, 0, 0);
+}
+
+struct intel_soc_dts_sensors *
+intel_soc_dts_iosf_init(enum intel_soc_dts_interrupt_type intr_type,
+ bool critical_trip, int crit_offset)
+{
+ struct intel_soc_dts_sensors *sensors;
+ int tj_max;
+ int ret;
+ int i;
+
+ if (!iosf_mbi_available())
+ return ERR_PTR(-ENODEV);
+
+ tj_max = intel_tcc_get_tjmax(-1);
+ if (tj_max < 0)
+ return ERR_PTR(tj_max);
+
+ sensors = kzalloc(sizeof(*sensors), GFP_KERNEL);
+ if (!sensors)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock_init(&sensors->intr_notify_lock);
+ mutex_init(&sensors->dts_update_lock);
+ sensors->intr_type = intr_type;
+ sensors->tj_max = tj_max * 1000;
+
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+ enum thermal_trip_type trip_type;
+ int temp;
+
+ sensors->soc_dts[i].sensors = sensors;
+
+ ret = configure_trip(&sensors->soc_dts[i], 0,
+ THERMAL_TRIP_PASSIVE, 0);
+ if (ret)
+ goto err_reset_trips;
+
+ if (critical_trip) {
+ trip_type = THERMAL_TRIP_CRITICAL;
+ temp = sensors->tj_max - crit_offset;
+ } else {
+ trip_type = THERMAL_TRIP_PASSIVE;
+ temp = 0;
+ }
+ ret = configure_trip(&sensors->soc_dts[i], 1, trip_type, temp);
+ if (ret)
+ goto err_reset_trips;
+ }
+
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+ ret = add_dts_thermal_zone(i, &sensors->soc_dts[i], critical_trip);
+ if (ret)
+ goto err_remove_zone;
+ }
+
+ return sensors;
+
+err_remove_zone:
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i)
+ remove_dts_thermal_zone(&sensors->soc_dts[i]);
+
+err_reset_trips:
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; i++)
+ dts_trips_reset(sensors, i);
+
+ kfree(sensors);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_init);
+
+void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors)
+{
+ int i;
+
+ for (i = 0; i < SOC_MAX_DTS_SENSORS; ++i) {
+ remove_dts_thermal_zone(&sensors->soc_dts[i]);
+ dts_trips_reset(sensors, i);
+ }
+ kfree(sensors);
+}
+EXPORT_SYMBOL_GPL(intel_soc_dts_iosf_exit);
+
+MODULE_IMPORT_NS(INTEL_TCC);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/intel_soc_dts_iosf.h b/drivers/thermal/intel/intel_soc_dts_iosf.h
new file mode 100644
index 0000000000..162841df0e
--- /dev/null
+++ b/drivers/thermal/intel/intel_soc_dts_iosf.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * intel_soc_dts_iosf.h
+ * Copyright (c) 2015, Intel Corporation.
+ */
+
+#ifndef _INTEL_SOC_DTS_IOSF_CORE_H
+#define _INTEL_SOC_DTS_IOSF_CORE_H
+
+#include <linux/thermal.h>
+
+/* DTS0 and DTS 1 */
+#define SOC_MAX_DTS_SENSORS 2
+
+/* Only 2 out of 4 is allowed for OSPM */
+#define SOC_MAX_DTS_TRIPS 2
+
+enum intel_soc_dts_interrupt_type {
+ INTEL_SOC_DTS_INTERRUPT_NONE,
+ INTEL_SOC_DTS_INTERRUPT_APIC,
+ INTEL_SOC_DTS_INTERRUPT_MSI,
+ INTEL_SOC_DTS_INTERRUPT_SCI,
+ INTEL_SOC_DTS_INTERRUPT_SMI,
+};
+
+struct intel_soc_dts_sensors;
+
+struct intel_soc_dts_sensor_entry {
+ int id;
+ u32 store_status;
+ u32 trip_mask;
+ struct thermal_trip trips[SOC_MAX_DTS_TRIPS];
+ struct thermal_zone_device *tzone;
+ struct intel_soc_dts_sensors *sensors;
+};
+
+struct intel_soc_dts_sensors {
+ u32 tj_max;
+ spinlock_t intr_notify_lock;
+ struct mutex dts_update_lock;
+ enum intel_soc_dts_interrupt_type intr_type;
+ struct intel_soc_dts_sensor_entry soc_dts[SOC_MAX_DTS_SENSORS];
+};
+
+
+struct intel_soc_dts_sensors *
+intel_soc_dts_iosf_init(enum intel_soc_dts_interrupt_type intr_type,
+ bool critical_trip, int crit_offset);
+void intel_soc_dts_iosf_exit(struct intel_soc_dts_sensors *sensors);
+void intel_soc_dts_iosf_interrupt_handler(
+ struct intel_soc_dts_sensors *sensors);
+#endif
diff --git a/drivers/thermal/intel/intel_soc_dts_thermal.c b/drivers/thermal/intel/intel_soc_dts_thermal.c
new file mode 100644
index 0000000000..9c825c6e1f
--- /dev/null
+++ b/drivers/thermal/intel/intel_soc_dts_thermal.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_soc_dts_thermal.c
+ * Copyright (c) 2014, Intel Corporation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <asm/cpu_device_id.h>
+#include <asm/intel-family.h>
+#include "intel_soc_dts_iosf.h"
+
+#define CRITICAL_OFFSET_FROM_TJ_MAX 5000
+
+static int crit_offset = CRITICAL_OFFSET_FROM_TJ_MAX;
+module_param(crit_offset, int, 0644);
+MODULE_PARM_DESC(crit_offset,
+ "Critical Temperature offset from tj max in millidegree Celsius.");
+
+/* IRQ 86 is a fixed APIC interrupt for BYT DTS Aux threshold notifications */
+#define BYT_SOC_DTS_APIC_IRQ 86
+
+static int soc_dts_thres_gsi;
+static int soc_dts_thres_irq;
+static struct intel_soc_dts_sensors *soc_dts;
+
+static irqreturn_t soc_irq_thread_fn(int irq, void *dev_data)
+{
+ pr_debug("proc_thermal_interrupt\n");
+ intel_soc_dts_iosf_interrupt_handler(soc_dts);
+
+ return IRQ_HANDLED;
+}
+
+static const struct x86_cpu_id soc_thermal_ids[] = {
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT, BYT_SOC_DTS_APIC_IRQ),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, soc_thermal_ids);
+
+static int __init intel_soc_thermal_init(void)
+{
+ int err = 0;
+ const struct x86_cpu_id *match_cpu;
+
+ match_cpu = x86_match_cpu(soc_thermal_ids);
+ if (!match_cpu)
+ return -ENODEV;
+
+ /* Create a zone with 2 trips with marked as read only */
+ soc_dts = intel_soc_dts_iosf_init(INTEL_SOC_DTS_INTERRUPT_APIC, true,
+ crit_offset);
+ if (IS_ERR(soc_dts)) {
+ err = PTR_ERR(soc_dts);
+ return err;
+ }
+
+ soc_dts_thres_gsi = (int)match_cpu->driver_data;
+ if (soc_dts_thres_gsi) {
+ /*
+ * Note the flags here MUST match the firmware defaults, rather
+ * then the request_irq flags, otherwise we get an EBUSY error.
+ */
+ soc_dts_thres_irq = acpi_register_gsi(NULL, soc_dts_thres_gsi,
+ ACPI_LEVEL_SENSITIVE,
+ ACPI_ACTIVE_LOW);
+ if (soc_dts_thres_irq < 0) {
+ pr_warn("intel_soc_dts: Could not get IRQ for GSI %d, err %d\n",
+ soc_dts_thres_gsi, soc_dts_thres_irq);
+ soc_dts_thres_irq = 0;
+ }
+ }
+
+ if (soc_dts_thres_irq) {
+ err = request_threaded_irq(soc_dts_thres_irq, NULL,
+ soc_irq_thread_fn,
+ IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "soc_dts", soc_dts);
+ if (err) {
+ /*
+ * Do not just error out because the user space thermal
+ * daemon such as DPTF may use polling instead of being
+ * interrupt driven.
+ */
+ pr_warn("request_threaded_irq ret %d\n", err);
+ }
+ }
+
+ return 0;
+}
+
+static void __exit intel_soc_thermal_exit(void)
+{
+ if (soc_dts_thres_irq) {
+ free_irq(soc_dts_thres_irq, soc_dts);
+ acpi_unregister_gsi(soc_dts_thres_gsi);
+ }
+ intel_soc_dts_iosf_exit(soc_dts);
+}
+
+module_init(intel_soc_thermal_init)
+module_exit(intel_soc_thermal_exit)
+
+MODULE_DESCRIPTION("Intel SoC DTS Thermal Driver");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/intel_tcc.c b/drivers/thermal/intel/intel_tcc.c
new file mode 100644
index 0000000000..2e5c741c41
--- /dev/null
+++ b/drivers/thermal/intel/intel_tcc.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * intel_tcc.c - Library for Intel TCC (thermal control circuitry) MSR access
+ * Copyright (c) 2022, Intel Corporation.
+ */
+
+#include <linux/errno.h>
+#include <linux/intel_tcc.h>
+#include <asm/msr.h>
+
+/**
+ * intel_tcc_get_tjmax() - returns the default TCC activation Temperature
+ * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ *
+ * Get the TjMax value, which is the default thermal throttling or TCC
+ * activation temperature in degrees C.
+ *
+ * Return: Tjmax value in degrees C on success, negative error code otherwise.
+ */
+int intel_tcc_get_tjmax(int cpu)
+{
+ u32 low, high;
+ int val, err;
+
+ if (cpu < 0)
+ err = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ else
+ err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ if (err)
+ return err;
+
+ val = (low >> 16) & 0xff;
+
+ return val ? val : -ENODATA;
+}
+EXPORT_SYMBOL_NS_GPL(intel_tcc_get_tjmax, INTEL_TCC);
+
+/**
+ * intel_tcc_get_offset() - returns the TCC Offset value to Tjmax
+ * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ *
+ * Get the TCC offset value to Tjmax. The effective thermal throttling or TCC
+ * activation temperature equals "Tjmax" - "TCC Offset", in degrees C.
+ *
+ * Return: Tcc offset value in degrees C on success, negative error code otherwise.
+ */
+int intel_tcc_get_offset(int cpu)
+{
+ u32 low, high;
+ int err;
+
+ if (cpu < 0)
+ err = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ else
+ err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ if (err)
+ return err;
+
+ return (low >> 24) & 0x3f;
+}
+EXPORT_SYMBOL_NS_GPL(intel_tcc_get_offset, INTEL_TCC);
+
+/**
+ * intel_tcc_set_offset() - set the TCC offset value to Tjmax
+ * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ * @offset: TCC offset value in degree C
+ *
+ * Set the TCC Offset value to Tjmax. The effective thermal throttling or TCC
+ * activation temperature equals "Tjmax" - "TCC Offset", in degree C.
+ *
+ * Return: On success returns 0, negative error code otherwise.
+ */
+
+int intel_tcc_set_offset(int cpu, int offset)
+{
+ u32 low, high;
+ int err;
+
+ if (offset < 0 || offset > 0x3f)
+ return -EINVAL;
+
+ if (cpu < 0)
+ err = rdmsr_safe(MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ else
+ err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &low, &high);
+ if (err)
+ return err;
+
+ /* MSR Locked */
+ if (low & BIT(31))
+ return -EPERM;
+
+ low &= ~(0x3f << 24);
+ low |= offset << 24;
+
+ if (cpu < 0)
+ return wrmsr_safe(MSR_IA32_TEMPERATURE_TARGET, low, high);
+ else
+ return wrmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, low, high);
+}
+EXPORT_SYMBOL_NS_GPL(intel_tcc_set_offset, INTEL_TCC);
+
+/**
+ * intel_tcc_get_temp() - returns the current temperature
+ * @cpu: cpu that the MSR should be run on, nagative value means any cpu.
+ * @pkg: true: Package Thermal Sensor. false: Core Thermal Sensor.
+ *
+ * Get the current temperature returned by the CPU core/package level
+ * thermal sensor, in degrees C.
+ *
+ * Return: Temperature in degrees C on success, negative error code otherwise.
+ */
+int intel_tcc_get_temp(int cpu, bool pkg)
+{
+ u32 low, high;
+ u32 msr = pkg ? MSR_IA32_PACKAGE_THERM_STATUS : MSR_IA32_THERM_STATUS;
+ int tjmax, temp, err;
+
+ tjmax = intel_tcc_get_tjmax(cpu);
+ if (tjmax < 0)
+ return tjmax;
+
+ if (cpu < 0)
+ err = rdmsr_safe(msr, &low, &high);
+ else
+ err = rdmsr_safe_on_cpu(cpu, msr, &low, &high);
+ if (err)
+ return err;
+
+ /* Temperature is beyond the valid thermal sensor range */
+ if (!(low & BIT(31)))
+ return -ENODATA;
+
+ temp = tjmax - ((low >> 16) & 0x7f);
+
+ /* Do not allow negative CPU temperature */
+ return temp >= 0 ? temp : -ENODATA;
+}
+EXPORT_SYMBOL_NS_GPL(intel_tcc_get_temp, INTEL_TCC);
diff --git a/drivers/thermal/intel/intel_tcc_cooling.c b/drivers/thermal/intel/intel_tcc_cooling.c
new file mode 100644
index 0000000000..6c392147e6
--- /dev/null
+++ b/drivers/thermal/intel/intel_tcc_cooling.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cooling device driver that activates the processor throttling by
+ * programming the TCC Offset register.
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/intel_tcc.h>
+#include <linux/module.h>
+#include <linux/thermal.h>
+#include <asm/cpu_device_id.h>
+
+#define TCC_PROGRAMMABLE BIT(30)
+#define TCC_LOCKED BIT(31)
+
+static struct thermal_cooling_device *tcc_cdev;
+
+static int tcc_get_max_state(struct thermal_cooling_device *cdev, unsigned long
+ *state)
+{
+ *state = 0x3f;
+ return 0;
+}
+
+static int tcc_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
+ *state)
+{
+ int offset = intel_tcc_get_offset(-1);
+
+ if (offset < 0)
+ return offset;
+
+ *state = offset;
+ return 0;
+}
+
+static int tcc_set_cur_state(struct thermal_cooling_device *cdev, unsigned long
+ state)
+{
+ return intel_tcc_set_offset(-1, (int)state);
+}
+
+static const struct thermal_cooling_device_ops tcc_cooling_ops = {
+ .get_max_state = tcc_get_max_state,
+ .get_cur_state = tcc_get_cur_state,
+ .set_cur_state = tcc_set_cur_state,
+};
+
+static const struct x86_cpu_id tcc_ids[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, NULL),
+ {}
+};
+
+MODULE_DEVICE_TABLE(x86cpu, tcc_ids);
+
+static int __init tcc_cooling_init(void)
+{
+ int ret;
+ u64 val;
+ const struct x86_cpu_id *id;
+
+ int err;
+
+ id = x86_match_cpu(tcc_ids);
+ if (!id)
+ return -ENODEV;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+ if (err)
+ return err;
+
+ if (!(val & TCC_PROGRAMMABLE))
+ return -ENODEV;
+
+ err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+ if (err)
+ return err;
+
+ if (val & TCC_LOCKED) {
+ pr_info("TCC Offset locked\n");
+ return -ENODEV;
+ }
+
+ pr_info("Programmable TCC Offset detected\n");
+
+ tcc_cdev =
+ thermal_cooling_device_register("TCC Offset", NULL,
+ &tcc_cooling_ops);
+ if (IS_ERR(tcc_cdev)) {
+ ret = PTR_ERR(tcc_cdev);
+ return ret;
+ }
+ return 0;
+}
+
+module_init(tcc_cooling_init)
+
+static void __exit tcc_cooling_exit(void)
+{
+ thermal_cooling_device_unregister(tcc_cdev);
+}
+
+module_exit(tcc_cooling_exit)
+
+MODULE_IMPORT_NS(INTEL_TCC);
+MODULE_DESCRIPTION("TCC offset cooling device Driver");
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
new file mode 100644
index 0000000000..e69868e868
--- /dev/null
+++ b/drivers/thermal/intel/therm_throt.c
@@ -0,0 +1,815 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Thermal throttle event support code (such as syslog messaging and rate
+ * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
+ *
+ * This allows consistent reporting of CPU thermal throttle events.
+ *
+ * Maintains a counter in /sys that keeps track of the number of thermal
+ * events, such that the user knows how bad the thermal problem might be
+ * (since the logging to syslog is rate limited).
+ *
+ * Author: Dmitriy Zavin (dmitriyz@google.com)
+ *
+ * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
+ * Inspired by Ross Biro's and Al Borchers' counter code.
+ */
+#include <linux/interrupt.h>
+#include <linux/notifier.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+#include <linux/cpu.h>
+
+#include <asm/processor.h>
+#include <asm/thermal.h>
+#include <asm/traps.h>
+#include <asm/apic.h>
+#include <asm/irq.h>
+#include <asm/msr.h>
+
+#include "intel_hfi.h"
+#include "thermal_interrupt.h"
+
+/* How long to wait between reporting thermal events */
+#define CHECK_INTERVAL (300 * HZ)
+
+#define THERMAL_THROTTLING_EVENT 0
+#define POWER_LIMIT_EVENT 1
+
+/**
+ * struct _thermal_state - Represent the current thermal event state
+ * @next_check: Stores the next timestamp, when it is allowed
+ * to log the next warning message.
+ * @last_interrupt_time: Stores the timestamp for the last threshold
+ * high event.
+ * @therm_work: Delayed workqueue structure
+ * @count: Stores the current running count for thermal
+ * or power threshold interrupts.
+ * @last_count: Stores the previous running count for thermal
+ * or power threshold interrupts.
+ * @max_time_ms: This shows the maximum amount of time CPU was
+ * in throttled state for a single thermal
+ * threshold high to low state.
+ * @total_time_ms: This is a cumulative time during which CPU was
+ * in the throttled state.
+ * @rate_control_active: Set when a throttling message is logged.
+ * This is used for the purpose of rate-control.
+ * @new_event: Stores the last high/low status of the
+ * THERM_STATUS_PROCHOT or
+ * THERM_STATUS_POWER_LIMIT.
+ * @level: Stores whether this _thermal_state instance is
+ * for a CORE level or for PACKAGE level.
+ * @sample_index: Index for storing the next sample in the buffer
+ * temp_samples[].
+ * @sample_count: Total number of samples collected in the buffer
+ * temp_samples[].
+ * @average: The last moving average of temperature samples
+ * @baseline_temp: Temperature at which thermal threshold high
+ * interrupt was generated.
+ * @temp_samples: Storage for temperature samples to calculate
+ * moving average.
+ *
+ * This structure is used to represent data related to thermal state for a CPU.
+ * There is a separate storage for core and package level for each CPU.
+ */
+struct _thermal_state {
+ u64 next_check;
+ u64 last_interrupt_time;
+ struct delayed_work therm_work;
+ unsigned long count;
+ unsigned long last_count;
+ unsigned long max_time_ms;
+ unsigned long total_time_ms;
+ bool rate_control_active;
+ bool new_event;
+ u8 level;
+ u8 sample_index;
+ u8 sample_count;
+ u8 average;
+ u8 baseline_temp;
+ u8 temp_samples[3];
+};
+
+struct thermal_state {
+ struct _thermal_state core_throttle;
+ struct _thermal_state core_power_limit;
+ struct _thermal_state package_throttle;
+ struct _thermal_state package_power_limit;
+ struct _thermal_state core_thresh0;
+ struct _thermal_state core_thresh1;
+ struct _thermal_state pkg_thresh0;
+ struct _thermal_state pkg_thresh1;
+};
+
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+EXPORT_SYMBOL(platform_thermal_notify);
+
+/* Callback to handle core package threshold_interrupts */
+int (*platform_thermal_package_notify)(__u64 msr_val);
+EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
+
+/* Callback support of rate control, return true, if
+ * callback has rate control */
+bool (*platform_thermal_package_rate_control)(void);
+EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
+
+
+static DEFINE_PER_CPU(struct thermal_state, thermal_state);
+
+static atomic_t therm_throt_en = ATOMIC_INIT(0);
+
+static u32 lvtthmr_init __read_mostly;
+
+#ifdef CONFIG_SYSFS
+#define define_therm_throt_device_one_ro(_name) \
+ static DEVICE_ATTR(_name, 0444, \
+ therm_throt_device_show_##_name, \
+ NULL) \
+
+#define define_therm_throt_device_show_func(event, name) \
+ \
+static ssize_t therm_throt_device_show_##event##_##name( \
+ struct device *dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ unsigned int cpu = dev->id; \
+ ssize_t ret; \
+ \
+ preempt_disable(); /* CPU hotplug */ \
+ if (cpu_online(cpu)) { \
+ ret = sprintf(buf, "%lu\n", \
+ per_cpu(thermal_state, cpu).event.name); \
+ } else \
+ ret = 0; \
+ preempt_enable(); \
+ \
+ return ret; \
+}
+
+define_therm_throt_device_show_func(core_throttle, count);
+define_therm_throt_device_one_ro(core_throttle_count);
+
+define_therm_throt_device_show_func(core_power_limit, count);
+define_therm_throt_device_one_ro(core_power_limit_count);
+
+define_therm_throt_device_show_func(package_throttle, count);
+define_therm_throt_device_one_ro(package_throttle_count);
+
+define_therm_throt_device_show_func(package_power_limit, count);
+define_therm_throt_device_one_ro(package_power_limit_count);
+
+define_therm_throt_device_show_func(core_throttle, max_time_ms);
+define_therm_throt_device_one_ro(core_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, max_time_ms);
+define_therm_throt_device_one_ro(package_throttle_max_time_ms);
+
+define_therm_throt_device_show_func(core_throttle, total_time_ms);
+define_therm_throt_device_one_ro(core_throttle_total_time_ms);
+
+define_therm_throt_device_show_func(package_throttle, total_time_ms);
+define_therm_throt_device_one_ro(package_throttle_total_time_ms);
+
+static struct attribute *thermal_throttle_attrs[] = {
+ &dev_attr_core_throttle_count.attr,
+ &dev_attr_core_throttle_max_time_ms.attr,
+ &dev_attr_core_throttle_total_time_ms.attr,
+ NULL
+};
+
+static const struct attribute_group thermal_attr_group = {
+ .attrs = thermal_throttle_attrs,
+ .name = "thermal_throttle"
+};
+#endif /* CONFIG_SYSFS */
+
+#define THERM_THROT_POLL_INTERVAL HZ
+#define THERM_STATUS_PROCHOT_LOG BIT(1)
+
+static u64 therm_intr_core_clear_mask;
+static u64 therm_intr_pkg_clear_mask;
+
+static void thermal_intr_init_core_clear_mask(void)
+{
+ if (therm_intr_core_clear_mask)
+ return;
+
+ /*
+ * Reference: Intel SDM Volume 4
+ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C
+ * IA32_THERM_STATUS.
+ */
+
+ /*
+ * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not
+ * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
+ */
+ therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
+
+ /*
+ * Bit 7 and 9: Thermal Threshold #1 and #2 log
+ * If CPUID.01H:ECX[8] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_TM2))
+ therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
+
+ /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */
+ if (boot_cpu_has(X86_FEATURE_PLN))
+ therm_intr_core_clear_mask |= BIT(11);
+
+ /*
+ * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
+ * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_HWP))
+ therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
+}
+
+static void thermal_intr_init_pkg_clear_mask(void)
+{
+ if (therm_intr_pkg_clear_mask)
+ return;
+
+ /*
+ * Reference: Intel SDM Volume 4
+ * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1
+ * IA32_PACKAGE_THERM_STATUS.
+ */
+
+ /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
+ if (boot_cpu_has(X86_FEATURE_PTS))
+ therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
+
+ /*
+ * Intel SDM Volume 2A: Thermal and Power Management Leaf
+ * Bit 26: CPUID.06H: EAX[19] = 1
+ */
+ if (boot_cpu_has(X86_FEATURE_HFI))
+ therm_intr_pkg_clear_mask |= BIT(26);
+}
+
+/*
+ * Clear the bits in package thermal status register for bit = 1
+ * in bitmask
+ */
+void thermal_clear_package_intr_status(int level, u64 bit_mask)
+{
+ u64 msr_val;
+ int msr;
+
+ if (level == CORE_LEVEL) {
+ msr = MSR_IA32_THERM_STATUS;
+ msr_val = therm_intr_core_clear_mask;
+ } else {
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+ msr_val = therm_intr_pkg_clear_mask;
+ }
+
+ msr_val &= ~bit_mask;
+ wrmsrl(msr, msr_val);
+}
+EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
+
+static void get_therm_status(int level, bool *proc_hot, u8 *temp)
+{
+ int msr;
+ u64 msr_val;
+
+ if (level == CORE_LEVEL)
+ msr = MSR_IA32_THERM_STATUS;
+ else
+ msr = MSR_IA32_PACKAGE_THERM_STATUS;
+
+ rdmsrl(msr, msr_val);
+ if (msr_val & THERM_STATUS_PROCHOT_LOG)
+ *proc_hot = true;
+ else
+ *proc_hot = false;
+
+ *temp = (msr_val >> 16) & 0x7F;
+}
+
+static void __maybe_unused throttle_active_work(struct work_struct *work)
+{
+ struct _thermal_state *state = container_of(to_delayed_work(work),
+ struct _thermal_state, therm_work);
+ unsigned int i, avg, this_cpu = smp_processor_id();
+ u64 now = get_jiffies_64();
+ bool hot;
+ u8 temp;
+
+ get_therm_status(state->level, &hot, &temp);
+ /* temperature value is offset from the max so lesser means hotter */
+ if (!hot && temp > state->baseline_temp) {
+ if (state->rate_control_active)
+ pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
+ this_cpu,
+ state->level == CORE_LEVEL ? "Core" : "Package",
+ state->count);
+
+ state->rate_control_active = false;
+ return;
+ }
+
+ if (time_before64(now, state->next_check) &&
+ state->rate_control_active)
+ goto re_arm;
+
+ state->next_check = now + CHECK_INTERVAL;
+
+ if (state->count != state->last_count) {
+ /* There was one new thermal interrupt */
+ state->last_count = state->count;
+ state->average = 0;
+ state->sample_count = 0;
+ state->sample_index = 0;
+ }
+
+ state->temp_samples[state->sample_index] = temp;
+ state->sample_count++;
+ state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
+ if (state->sample_count < ARRAY_SIZE(state->temp_samples))
+ goto re_arm;
+
+ avg = 0;
+ for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
+ avg += state->temp_samples[i];
+
+ avg /= ARRAY_SIZE(state->temp_samples);
+
+ if (state->average > avg) {
+ pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
+ this_cpu,
+ state->level == CORE_LEVEL ? "Core" : "Package",
+ state->count);
+ state->rate_control_active = true;
+ }
+
+ state->average = avg;
+
+re_arm:
+ thermal_clear_package_intr_status(state->level, THERM_STATUS_PROCHOT_LOG);
+ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+}
+
+/***
+ * therm_throt_process - Process thermal throttling event from interrupt
+ * @curr: Whether the condition is current or not (boolean), since the
+ * thermal interrupt normally gets called both when the thermal
+ * event begins and once the event has ended.
+ *
+ * This function is called by the thermal interrupt after the
+ * IRQ has been acknowledged.
+ *
+ * It will take care of rate limiting and printing messages to the syslog.
+ */
+static void therm_throt_process(bool new_event, int event, int level)
+{
+ struct _thermal_state *state;
+ unsigned int this_cpu = smp_processor_id();
+ bool old_event;
+ u64 now;
+ struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+
+ now = get_jiffies_64();
+ if (level == CORE_LEVEL) {
+ if (event == THERMAL_THROTTLING_EVENT)
+ state = &pstate->core_throttle;
+ else if (event == POWER_LIMIT_EVENT)
+ state = &pstate->core_power_limit;
+ else
+ return;
+ } else if (level == PACKAGE_LEVEL) {
+ if (event == THERMAL_THROTTLING_EVENT)
+ state = &pstate->package_throttle;
+ else if (event == POWER_LIMIT_EVENT)
+ state = &pstate->package_power_limit;
+ else
+ return;
+ } else
+ return;
+
+ old_event = state->new_event;
+ state->new_event = new_event;
+
+ if (new_event)
+ state->count++;
+
+ if (event != THERMAL_THROTTLING_EVENT)
+ return;
+
+ if (new_event && !state->last_interrupt_time) {
+ bool hot;
+ u8 temp;
+
+ get_therm_status(state->level, &hot, &temp);
+ /*
+ * Ignore short temperature spike as the system is not close
+ * to PROCHOT. 10C offset is large enough to ignore. It is
+ * already dropped from the high threshold temperature.
+ */
+ if (temp > 10)
+ return;
+
+ state->baseline_temp = temp;
+ state->last_interrupt_time = now;
+ schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
+ } else if (old_event && state->last_interrupt_time) {
+ unsigned long throttle_time;
+
+ throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
+ if (throttle_time > state->max_time_ms)
+ state->max_time_ms = throttle_time;
+ state->total_time_ms += throttle_time;
+ state->last_interrupt_time = 0;
+ }
+}
+
+static int thresh_event_valid(int level, int event)
+{
+ struct _thermal_state *state;
+ unsigned int this_cpu = smp_processor_id();
+ struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+ u64 now = get_jiffies_64();
+
+ if (level == PACKAGE_LEVEL)
+ state = (event == 0) ? &pstate->pkg_thresh0 :
+ &pstate->pkg_thresh1;
+ else
+ state = (event == 0) ? &pstate->core_thresh0 :
+ &pstate->core_thresh1;
+
+ if (time_before64(now, state->next_check))
+ return 0;
+
+ state->next_check = now + CHECK_INTERVAL;
+
+ return 1;
+}
+
+static bool int_pln_enable;
+static int __init int_pln_enable_setup(char *s)
+{
+ int_pln_enable = true;
+
+ return 1;
+}
+__setup("int_pln_enable", int_pln_enable_setup);
+
+#ifdef CONFIG_SYSFS
+/* Add/Remove thermal_throttle interface for CPU device: */
+static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
+{
+ int err;
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
+ if (err)
+ return err;
+
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_core_power_limit_count.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+ }
+
+ if (cpu_has(c, X86_FEATURE_PTS)) {
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_throttle_count.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_throttle_max_time_ms.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_throttle_total_time_ms.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+
+ if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
+ err = sysfs_add_file_to_group(&dev->kobj,
+ &dev_attr_package_power_limit_count.attr,
+ thermal_attr_group.name);
+ if (err)
+ goto del_group;
+ }
+ }
+
+ return 0;
+
+del_group:
+ sysfs_remove_group(&dev->kobj, &thermal_attr_group);
+
+ return err;
+}
+
+static void thermal_throttle_remove_dev(struct device *dev)
+{
+ sysfs_remove_group(&dev->kobj, &thermal_attr_group);
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static int thermal_throttle_online(unsigned int cpu)
+{
+ struct thermal_state *state = &per_cpu(thermal_state, cpu);
+ struct device *dev = get_cpu_device(cpu);
+ u32 l;
+
+ state->package_throttle.level = PACKAGE_LEVEL;
+ state->core_throttle.level = CORE_LEVEL;
+
+ INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
+ INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
+
+ /*
+ * The first CPU coming online will enable the HFI. Usually this causes
+ * hardware to issue an HFI thermal interrupt. Such interrupt will reach
+ * the CPU once we enable the thermal vector in the local APIC.
+ */
+ intel_hfi_online(cpu);
+
+ /* Unmask the thermal vector after the above workqueues are initialized. */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+
+ return thermal_throttle_add_dev(dev, cpu);
+}
+
+static int thermal_throttle_offline(unsigned int cpu)
+{
+ struct thermal_state *state = &per_cpu(thermal_state, cpu);
+ struct device *dev = get_cpu_device(cpu);
+ u32 l;
+
+ /* Mask the thermal vector before draining evtl. pending work */
+ l = apic_read(APIC_LVTTHMR);
+ apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
+
+ intel_hfi_offline(cpu);
+
+ cancel_delayed_work_sync(&state->package_throttle.therm_work);
+ cancel_delayed_work_sync(&state->core_throttle.therm_work);
+
+ state->package_throttle.rate_control_active = false;
+ state->core_throttle.rate_control_active = false;
+
+ thermal_throttle_remove_dev(dev);
+ return 0;
+}
+
+static __init int thermal_throttle_init_device(void)
+{
+ int ret;
+
+ if (!atomic_read(&therm_throt_en))
+ return 0;
+
+ intel_hfi_init();
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
+ thermal_throttle_online,
+ thermal_throttle_offline);
+ return ret < 0 ? ret : 0;
+}
+device_initcall(thermal_throttle_init_device);
+
+#endif /* CONFIG_SYSFS */
+
+static void notify_package_thresholds(__u64 msr_val)
+{
+ bool notify_thres_0 = false;
+ bool notify_thres_1 = false;
+
+ if (!platform_thermal_package_notify)
+ return;
+
+ /* lower threshold check */
+ if (msr_val & THERM_LOG_THRESHOLD0)
+ notify_thres_0 = true;
+ /* higher threshold check */
+ if (msr_val & THERM_LOG_THRESHOLD1)
+ notify_thres_1 = true;
+
+ if (!notify_thres_0 && !notify_thres_1)
+ return;
+
+ if (platform_thermal_package_rate_control &&
+ platform_thermal_package_rate_control()) {
+ /* Rate control is implemented in callback */
+ platform_thermal_package_notify(msr_val);
+ return;
+ }
+
+ /* lower threshold reached */
+ if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
+ platform_thermal_package_notify(msr_val);
+ /* higher threshold reached */
+ if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
+ platform_thermal_package_notify(msr_val);
+}
+
+static void notify_thresholds(__u64 msr_val)
+{
+ /* check whether the interrupt handler is defined;
+ * otherwise simply return
+ */
+ if (!platform_thermal_notify)
+ return;
+
+ /* lower threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD0) &&
+ thresh_event_valid(CORE_LEVEL, 0))
+ platform_thermal_notify(msr_val);
+ /* higher threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD1) &&
+ thresh_event_valid(CORE_LEVEL, 1))
+ platform_thermal_notify(msr_val);
+}
+
+void __weak notify_hwp_interrupt(void)
+{
+ wrmsrl_safe(MSR_HWP_STATUS, 0);
+}
+
+/* Thermal transition interrupt handler */
+void intel_thermal_interrupt(void)
+{
+ __u64 msr_val;
+
+ if (static_cpu_has(X86_FEATURE_HWP))
+ notify_hwp_interrupt();
+
+ rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+
+ /* Check for violation of core thermal thresholds*/
+ notify_thresholds(msr_val);
+
+ therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
+ THERMAL_THROTTLING_EVENT,
+ CORE_LEVEL);
+
+ if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
+ therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
+ POWER_LIMIT_EVENT,
+ CORE_LEVEL);
+
+ if (this_cpu_has(X86_FEATURE_PTS)) {
+ rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
+ /* check violations of package thermal thresholds */
+ notify_package_thresholds(msr_val);
+ therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
+ THERMAL_THROTTLING_EVENT,
+ PACKAGE_LEVEL);
+ if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
+ therm_throt_process(msr_val &
+ PACKAGE_THERM_STATUS_POWER_LIMIT,
+ POWER_LIMIT_EVENT,
+ PACKAGE_LEVEL);
+
+ if (this_cpu_has(X86_FEATURE_HFI))
+ intel_hfi_process_event(msr_val &
+ PACKAGE_THERM_STATUS_HFI_UPDATED);
+ }
+}
+
+/* Thermal monitoring depends on APIC, ACPI and clock modulation */
+static int intel_thermal_supported(struct cpuinfo_x86 *c)
+{
+ if (!boot_cpu_has(X86_FEATURE_APIC))
+ return 0;
+ if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
+ return 0;
+ return 1;
+}
+
+bool x86_thermal_enabled(void)
+{
+ return atomic_read(&therm_throt_en);
+}
+
+void __init therm_lvt_init(void)
+{
+ /*
+ * This function is only called on boot CPU. Save the init thermal
+ * LVT value on BSP and use that value to restore APs' thermal LVT
+ * entry BIOS programmed later
+ */
+ if (intel_thermal_supported(&boot_cpu_data))
+ lvtthmr_init = apic_read(APIC_LVTTHMR);
+}
+
+void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+ unsigned int cpu = smp_processor_id();
+ int tm2 = 0;
+ u32 l, h;
+
+ if (!intel_thermal_supported(c))
+ return;
+
+ /*
+ * First check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already:
+ */
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+
+ h = lvtthmr_init;
+ /*
+ * The initial value of thermal LVT entries on all APs always reads
+ * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
+ * sequence to them and LVT registers are reset to 0s except for
+ * the mask bits which are set to 1s when APs receive INIT IPI.
+ * If BIOS takes over the thermal interrupt and sets its interrupt
+ * delivery mode to SMI (not fixed), it restores the value that the
+ * BIOS has programmed on AP based on BSP's info we saved since BIOS
+ * is always setting the same value for all threads/cores.
+ */
+ if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
+ apic_write(APIC_LVTTHMR, lvtthmr_init);
+
+
+ if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+ if (system_state == SYSTEM_BOOTING)
+ pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ return;
+ }
+
+ /* early Pentium M models use different method for enabling TM2 */
+ if (cpu_has(c, X86_FEATURE_TM2)) {
+ if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
+ rdmsr(MSR_THERM2_CTL, l, h);
+ if (l & MSR_THERM2_CTL_TM_SELECT)
+ tm2 = 1;
+ } else if (l & MSR_IA32_MISC_ENABLE_TM2)
+ tm2 = 1;
+ }
+
+ /* We'll mask the thermal vector in the lapic till we're ready: */
+ h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
+ apic_write(APIC_LVTTHMR, h);
+
+ thermal_intr_init_core_clear_mask();
+ thermal_intr_init_pkg_clear_mask();
+
+ rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
+ if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ (l | (THERM_INT_LOW_ENABLE
+ | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
+ else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ l | (THERM_INT_LOW_ENABLE
+ | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
+ else
+ wrmsr(MSR_IA32_THERM_INTERRUPT,
+ l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
+
+ if (cpu_has(c, X86_FEATURE_PTS)) {
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ (l | (PACKAGE_THERM_INT_LOW_ENABLE
+ | PACKAGE_THERM_INT_HIGH_ENABLE))
+ & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
+ else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | (PACKAGE_THERM_INT_LOW_ENABLE
+ | PACKAGE_THERM_INT_HIGH_ENABLE
+ | PACKAGE_THERM_INT_PLN_ENABLE), h);
+ else
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | (PACKAGE_THERM_INT_LOW_ENABLE
+ | PACKAGE_THERM_INT_HIGH_ENABLE), h);
+
+ if (cpu_has(c, X86_FEATURE_HFI)) {
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l | PACKAGE_THERM_INT_HFI_ENABLE, h);
+ }
+ }
+
+ rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+
+ pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
+ tm2 ? "TM2" : "TM1");
+
+ /* enable thermal throttle processing */
+ atomic_set(&therm_throt_en, 1);
+}
diff --git a/drivers/thermal/intel/thermal_interrupt.h b/drivers/thermal/intel/thermal_interrupt.h
new file mode 100644
index 0000000000..01dfd4cdb5
--- /dev/null
+++ b/drivers/thermal/intel/thermal_interrupt.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _INTEL_THERMAL_INTERRUPT_H
+#define _INTEL_THERMAL_INTERRUPT_H
+
+#define CORE_LEVEL 0
+#define PACKAGE_LEVEL 1
+
+/* Interrupt Handler for package thermal thresholds */
+extern int (*platform_thermal_package_notify)(__u64 msr_val);
+
+/* Interrupt Handler for core thermal thresholds */
+extern int (*platform_thermal_notify)(__u64 msr_val);
+
+/* Callback support of rate control, return true, if
+ * callback has rate control */
+extern bool (*platform_thermal_package_rate_control)(void);
+
+/* Handle HWP interrupt */
+extern void notify_hwp_interrupt(void);
+
+/* Common function to clear Package thermal status register */
+extern void thermal_clear_package_intr_status(int level, u64 bit_mask);
+
+#endif /* _INTEL_THERMAL_INTERRUPT_H */
diff --git a/drivers/thermal/intel/x86_pkg_temp_thermal.c b/drivers/thermal/intel/x86_pkg_temp_thermal.c
new file mode 100644
index 0000000000..11a7f8108b
--- /dev/null
+++ b/drivers/thermal/intel/x86_pkg_temp_thermal.c
@@ -0,0 +1,538 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * x86_pkg_temp_thermal driver
+ * Copyright (c) 2013, Intel Corporation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/intel_tcc.h>
+#include <linux/err.h>
+#include <linux/param.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/slab.h>
+#include <linux/pm.h>
+#include <linux/thermal.h>
+#include <linux/debugfs.h>
+
+#include <asm/cpu_device_id.h>
+
+#include "thermal_interrupt.h"
+
+/*
+* Rate control delay: Idea is to introduce denounce effect
+* This should be long enough to avoid reduce events, when
+* threshold is set to a temperature, which is constantly
+* violated, but at the short enough to take any action.
+* The action can be remove threshold or change it to next
+* interesting setting. Based on experiments, in around
+* every 5 seconds under load will give us a significant
+* temperature change.
+*/
+#define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000
+static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY;
+module_param(notify_delay_ms, int, 0644);
+MODULE_PARM_DESC(notify_delay_ms,
+ "User space notification delay in milli seconds.");
+
+/* Number of trip points in thermal zone. Currently it can't
+* be more than 2. MSR can allow setting and getting notifications
+* for only 2 thresholds. This define enforces this, if there
+* is some wrong values returned by cpuid for number of thresholds.
+*/
+#define MAX_NUMBER_OF_TRIPS 2
+
+struct zone_device {
+ int cpu;
+ bool work_scheduled;
+ u32 msr_pkg_therm_low;
+ u32 msr_pkg_therm_high;
+ struct delayed_work work;
+ struct thermal_zone_device *tzone;
+ struct thermal_trip *trips;
+ struct cpumask cpumask;
+};
+
+static struct thermal_zone_params pkg_temp_tz_params = {
+ .no_hwmon = true,
+};
+
+/* Keep track of how many zone pointers we allocated in init() */
+static int max_id __read_mostly;
+/* Array of zone pointers */
+static struct zone_device **zones;
+/* Serializes interrupt notification, work and hotplug */
+static DEFINE_RAW_SPINLOCK(pkg_temp_lock);
+/* Protects zone operation in the work function against hotplug removal */
+static DEFINE_MUTEX(thermal_zone_mutex);
+
+/* The dynamically assigned cpu hotplug state for module_exit() */
+static enum cpuhp_state pkg_thermal_hp_state __read_mostly;
+
+/* Debug counters to show using debugfs */
+static struct dentry *debugfs;
+static unsigned int pkg_interrupt_cnt;
+static unsigned int pkg_work_cnt;
+
+static void pkg_temp_debugfs_init(void)
+{
+ debugfs = debugfs_create_dir("pkg_temp_thermal", NULL);
+
+ debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs,
+ &pkg_interrupt_cnt);
+ debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs,
+ &pkg_work_cnt);
+}
+
+/*
+ * Protection:
+ *
+ * - cpu hotplug: Read serialized by cpu hotplug lock
+ * Write must hold pkg_temp_lock
+ *
+ * - Other callsites: Must hold pkg_temp_lock
+ */
+static struct zone_device *pkg_temp_thermal_get_dev(unsigned int cpu)
+{
+ int id = topology_logical_die_id(cpu);
+
+ if (id >= 0 && id < max_id)
+ return zones[id];
+ return NULL;
+}
+
+static int sys_get_curr_temp(struct thermal_zone_device *tzd, int *temp)
+{
+ struct zone_device *zonedev = thermal_zone_device_priv(tzd);
+ int val;
+
+ val = intel_tcc_get_temp(zonedev->cpu, true);
+ if (val < 0)
+ return val;
+
+ *temp = val * 1000;
+ pr_debug("sys_get_curr_temp %d\n", *temp);
+ return 0;
+}
+
+static int
+sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, int temp)
+{
+ struct zone_device *zonedev = thermal_zone_device_priv(tzd);
+ u32 l, h, mask, shift, intr;
+ int tj_max, val, ret;
+
+ tj_max = intel_tcc_get_tjmax(zonedev->cpu);
+ if (tj_max < 0)
+ return tj_max;
+ tj_max *= 1000;
+
+ val = (tj_max - temp)/1000;
+
+ if (trip >= MAX_NUMBER_OF_TRIPS || val < 0 || val > 0x7f)
+ return -EINVAL;
+
+ ret = rdmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ &l, &h);
+ if (ret < 0)
+ return ret;
+
+ if (trip) {
+ mask = THERM_MASK_THRESHOLD1;
+ shift = THERM_SHIFT_THRESHOLD1;
+ intr = THERM_INT_THRESHOLD1_ENABLE;
+ } else {
+ mask = THERM_MASK_THRESHOLD0;
+ shift = THERM_SHIFT_THRESHOLD0;
+ intr = THERM_INT_THRESHOLD0_ENABLE;
+ }
+ l &= ~mask;
+ /*
+ * When users space sets a trip temperature == 0, which is indication
+ * that, it is no longer interested in receiving notifications.
+ */
+ if (!temp) {
+ l &= ~intr;
+ } else {
+ l |= val << shift;
+ l |= intr;
+ }
+
+ return wrmsr_on_cpu(zonedev->cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ l, h);
+}
+
+/* Thermal zone callback registry */
+static struct thermal_zone_device_ops tzone_ops = {
+ .get_temp = sys_get_curr_temp,
+ .set_trip_temp = sys_set_trip_temp,
+};
+
+static bool pkg_thermal_rate_control(void)
+{
+ return true;
+}
+
+/* Enable threshold interrupt on local package/cpu */
+static inline void enable_pkg_thres_interrupt(void)
+{
+ u8 thres_0, thres_1;
+ u32 l, h;
+
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+ /* only enable/disable if it had valid threshold value */
+ thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0;
+ thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1;
+ if (thres_0)
+ l |= THERM_INT_THRESHOLD0_ENABLE;
+ if (thres_1)
+ l |= THERM_INT_THRESHOLD1_ENABLE;
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+}
+
+/* Disable threshold interrupt on local package/cpu */
+static inline void disable_pkg_thres_interrupt(void)
+{
+ u32 l, h;
+
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+
+ l &= ~(THERM_INT_THRESHOLD0_ENABLE | THERM_INT_THRESHOLD1_ENABLE);
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
+}
+
+static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work)
+{
+ struct thermal_zone_device *tzone = NULL;
+ int cpu = smp_processor_id();
+ struct zone_device *zonedev;
+
+ mutex_lock(&thermal_zone_mutex);
+ raw_spin_lock_irq(&pkg_temp_lock);
+ ++pkg_work_cnt;
+
+ zonedev = pkg_temp_thermal_get_dev(cpu);
+ if (!zonedev) {
+ raw_spin_unlock_irq(&pkg_temp_lock);
+ mutex_unlock(&thermal_zone_mutex);
+ return;
+ }
+ zonedev->work_scheduled = false;
+
+ thermal_clear_package_intr_status(PACKAGE_LEVEL, THERM_LOG_THRESHOLD0 | THERM_LOG_THRESHOLD1);
+ tzone = zonedev->tzone;
+
+ enable_pkg_thres_interrupt();
+ raw_spin_unlock_irq(&pkg_temp_lock);
+
+ /*
+ * If tzone is not NULL, then thermal_zone_mutex will prevent the
+ * concurrent removal in the cpu offline callback.
+ */
+ if (tzone)
+ thermal_zone_device_update(tzone, THERMAL_EVENT_UNSPECIFIED);
+
+ mutex_unlock(&thermal_zone_mutex);
+}
+
+static void pkg_thermal_schedule_work(int cpu, struct delayed_work *work)
+{
+ unsigned long ms = msecs_to_jiffies(notify_delay_ms);
+
+ schedule_delayed_work_on(cpu, work, ms);
+}
+
+static int pkg_thermal_notify(u64 msr_val)
+{
+ int cpu = smp_processor_id();
+ struct zone_device *zonedev;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&pkg_temp_lock, flags);
+ ++pkg_interrupt_cnt;
+
+ disable_pkg_thres_interrupt();
+
+ /* Work is per package, so scheduling it once is enough. */
+ zonedev = pkg_temp_thermal_get_dev(cpu);
+ if (zonedev && !zonedev->work_scheduled) {
+ zonedev->work_scheduled = true;
+ pkg_thermal_schedule_work(zonedev->cpu, &zonedev->work);
+ }
+
+ raw_spin_unlock_irqrestore(&pkg_temp_lock, flags);
+ return 0;
+}
+
+static struct thermal_trip *pkg_temp_thermal_trips_init(int cpu, int tj_max, int num_trips)
+{
+ struct thermal_trip *trips;
+ unsigned long thres_reg_value;
+ u32 mask, shift, eax, edx;
+ int ret, i;
+
+ trips = kzalloc(sizeof(*trips) * num_trips, GFP_KERNEL);
+ if (!trips)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < num_trips; i++) {
+
+ if (i) {
+ mask = THERM_MASK_THRESHOLD1;
+ shift = THERM_SHIFT_THRESHOLD1;
+ } else {
+ mask = THERM_MASK_THRESHOLD0;
+ shift = THERM_SHIFT_THRESHOLD0;
+ }
+
+ ret = rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ &eax, &edx);
+ if (ret < 0) {
+ kfree(trips);
+ return ERR_PTR(ret);
+ }
+
+ thres_reg_value = (eax & mask) >> shift;
+
+ trips[i].temperature = thres_reg_value ?
+ tj_max - thres_reg_value * 1000 : THERMAL_TEMP_INVALID;
+
+ trips[i].type = THERMAL_TRIP_PASSIVE;
+
+ pr_debug("%s: cpu=%d, trip=%d, temp=%d\n",
+ __func__, cpu, i, trips[i].temperature);
+ }
+
+ return trips;
+}
+
+static int pkg_temp_thermal_device_add(unsigned int cpu)
+{
+ int id = topology_logical_die_id(cpu);
+ u32 eax, ebx, ecx, edx;
+ struct zone_device *zonedev;
+ int thres_count, err;
+ int tj_max;
+
+ if (id >= max_id)
+ return -ENOMEM;
+
+ cpuid(6, &eax, &ebx, &ecx, &edx);
+ thres_count = ebx & 0x07;
+ if (!thres_count)
+ return -ENODEV;
+
+ thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS);
+
+ tj_max = intel_tcc_get_tjmax(cpu);
+ if (tj_max < 0)
+ return tj_max;
+
+ zonedev = kzalloc(sizeof(*zonedev), GFP_KERNEL);
+ if (!zonedev)
+ return -ENOMEM;
+
+ zonedev->trips = pkg_temp_thermal_trips_init(cpu, tj_max, thres_count);
+ if (IS_ERR(zonedev->trips)) {
+ err = PTR_ERR(zonedev->trips);
+ goto out_kfree_zonedev;
+ }
+
+ INIT_DELAYED_WORK(&zonedev->work, pkg_temp_thermal_threshold_work_fn);
+ zonedev->cpu = cpu;
+ zonedev->tzone = thermal_zone_device_register_with_trips("x86_pkg_temp",
+ zonedev->trips, thres_count,
+ (thres_count == MAX_NUMBER_OF_TRIPS) ? 0x03 : 0x01,
+ zonedev, &tzone_ops, &pkg_temp_tz_params, 0, 0);
+ if (IS_ERR(zonedev->tzone)) {
+ err = PTR_ERR(zonedev->tzone);
+ goto out_kfree_trips;
+ }
+ err = thermal_zone_device_enable(zonedev->tzone);
+ if (err)
+ goto out_unregister_tz;
+
+ /* Store MSR value for package thermal interrupt, to restore at exit */
+ rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, zonedev->msr_pkg_therm_low,
+ zonedev->msr_pkg_therm_high);
+
+ cpumask_set_cpu(cpu, &zonedev->cpumask);
+ raw_spin_lock_irq(&pkg_temp_lock);
+ zones[id] = zonedev;
+ raw_spin_unlock_irq(&pkg_temp_lock);
+
+ return 0;
+
+out_unregister_tz:
+ thermal_zone_device_unregister(zonedev->tzone);
+out_kfree_trips:
+ kfree(zonedev->trips);
+out_kfree_zonedev:
+ kfree(zonedev);
+ return err;
+}
+
+static int pkg_thermal_cpu_offline(unsigned int cpu)
+{
+ struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
+ bool lastcpu, was_target;
+ int target;
+
+ if (!zonedev)
+ return 0;
+
+ target = cpumask_any_but(&zonedev->cpumask, cpu);
+ cpumask_clear_cpu(cpu, &zonedev->cpumask);
+ lastcpu = target >= nr_cpu_ids;
+ /*
+ * Remove the sysfs files, if this is the last cpu in the package
+ * before doing further cleanups.
+ */
+ if (lastcpu) {
+ struct thermal_zone_device *tzone = zonedev->tzone;
+
+ /*
+ * We must protect against a work function calling
+ * thermal_zone_update, after/while unregister. We null out
+ * the pointer under the zone mutex, so the worker function
+ * won't try to call.
+ */
+ mutex_lock(&thermal_zone_mutex);
+ zonedev->tzone = NULL;
+ mutex_unlock(&thermal_zone_mutex);
+
+ thermal_zone_device_unregister(tzone);
+ }
+
+ /* Protect against work and interrupts */
+ raw_spin_lock_irq(&pkg_temp_lock);
+
+ /*
+ * Check whether this cpu was the current target and store the new
+ * one. When we drop the lock, then the interrupt notify function
+ * will see the new target.
+ */
+ was_target = zonedev->cpu == cpu;
+ zonedev->cpu = target;
+
+ /*
+ * If this is the last CPU in the package remove the package
+ * reference from the array and restore the interrupt MSR. When we
+ * drop the lock neither the interrupt notify function nor the
+ * worker will see the package anymore.
+ */
+ if (lastcpu) {
+ zones[topology_logical_die_id(cpu)] = NULL;
+ /* After this point nothing touches the MSR anymore. */
+ wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
+ zonedev->msr_pkg_therm_low, zonedev->msr_pkg_therm_high);
+ }
+
+ /*
+ * Check whether there is work scheduled and whether the work is
+ * targeted at the outgoing CPU.
+ */
+ if (zonedev->work_scheduled && was_target) {
+ /*
+ * To cancel the work we need to drop the lock, otherwise
+ * we might deadlock if the work needs to be flushed.
+ */
+ raw_spin_unlock_irq(&pkg_temp_lock);
+ cancel_delayed_work_sync(&zonedev->work);
+ raw_spin_lock_irq(&pkg_temp_lock);
+ /*
+ * If this is not the last cpu in the package and the work
+ * did not run after we dropped the lock above, then we
+ * need to reschedule the work, otherwise the interrupt
+ * stays disabled forever.
+ */
+ if (!lastcpu && zonedev->work_scheduled)
+ pkg_thermal_schedule_work(target, &zonedev->work);
+ }
+
+ raw_spin_unlock_irq(&pkg_temp_lock);
+
+ /* Final cleanup if this is the last cpu */
+ if (lastcpu) {
+ kfree(zonedev->trips);
+ kfree(zonedev);
+ }
+ return 0;
+}
+
+static int pkg_thermal_cpu_online(unsigned int cpu)
+{
+ struct zone_device *zonedev = pkg_temp_thermal_get_dev(cpu);
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ /* Paranoia check */
+ if (!cpu_has(c, X86_FEATURE_DTHERM) || !cpu_has(c, X86_FEATURE_PTS))
+ return -ENODEV;
+
+ /* If the package exists, nothing to do */
+ if (zonedev) {
+ cpumask_set_cpu(cpu, &zonedev->cpumask);
+ return 0;
+ }
+ return pkg_temp_thermal_device_add(cpu);
+}
+
+static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = {
+ X86_MATCH_VENDOR_FEATURE(INTEL, X86_FEATURE_PTS, NULL),
+ {}
+};
+MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids);
+
+static int __init pkg_temp_thermal_init(void)
+{
+ int ret;
+
+ if (!x86_match_cpu(pkg_temp_thermal_ids))
+ return -ENODEV;
+
+ max_id = topology_max_packages() * topology_max_die_per_package();
+ zones = kcalloc(max_id, sizeof(struct zone_device *),
+ GFP_KERNEL);
+ if (!zones)
+ return -ENOMEM;
+
+ ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "thermal/x86_pkg:online",
+ pkg_thermal_cpu_online, pkg_thermal_cpu_offline);
+ if (ret < 0)
+ goto err;
+
+ /* Store the state for module exit */
+ pkg_thermal_hp_state = ret;
+
+ platform_thermal_package_notify = pkg_thermal_notify;
+ platform_thermal_package_rate_control = pkg_thermal_rate_control;
+
+ /* Don't care if it fails */
+ pkg_temp_debugfs_init();
+ return 0;
+
+err:
+ kfree(zones);
+ return ret;
+}
+module_init(pkg_temp_thermal_init)
+
+static void __exit pkg_temp_thermal_exit(void)
+{
+ platform_thermal_package_notify = NULL;
+ platform_thermal_package_rate_control = NULL;
+
+ cpuhp_remove_state(pkg_thermal_hp_state);
+ debugfs_remove_recursive(debugfs);
+ kfree(zones);
+}
+module_exit(pkg_temp_thermal_exit)
+
+MODULE_IMPORT_NS(INTEL_TCC);
+MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver");
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_LICENSE("GPL v2");