summaryrefslogtreecommitdiffstats
path: root/drivers/hwmon/peci
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/hwmon/peci/Kconfig31
-rw-r--r--drivers/hwmon/peci/Makefile7
-rw-r--r--drivers/hwmon/peci/common.h58
-rw-r--r--drivers/hwmon/peci/cputemp.c592
-rw-r--r--drivers/hwmon/peci/dimmtemp.c615
5 files changed, 1303 insertions, 0 deletions
diff --git a/drivers/hwmon/peci/Kconfig b/drivers/hwmon/peci/Kconfig
new file mode 100644
index 000000000..9d32a57ba
--- /dev/null
+++ b/drivers/hwmon/peci/Kconfig
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SENSORS_PECI_CPUTEMP
+ tristate "PECI CPU temperature monitoring client"
+ depends on PECI
+ select SENSORS_PECI
+ select PECI_CPU
+ help
+ If you say yes here you get support for the generic Intel PECI
+ cputemp driver which provides Digital Thermal Sensor (DTS) thermal
+ readings of the CPU package and CPU cores that are accessible via
+ the processor PECI interface.
+
+ This driver can also be built as a module. If so, the module
+ will be called peci-cputemp.
+
+config SENSORS_PECI_DIMMTEMP
+ tristate "PECI DIMM temperature monitoring client"
+ depends on PECI
+ select SENSORS_PECI
+ select PECI_CPU
+ help
+ If you say yes here you get support for the generic Intel PECI hwmon
+ driver which provides Temperature Sensor on DIMM readings that are
+ accessible via the processor PECI interface.
+
+ This driver can also be built as a module. If so, the module
+ will be called peci-dimmtemp.
+
+config SENSORS_PECI
+ tristate
diff --git a/drivers/hwmon/peci/Makefile b/drivers/hwmon/peci/Makefile
new file mode 100644
index 000000000..191cfa022
--- /dev/null
+++ b/drivers/hwmon/peci/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+peci-cputemp-y := cputemp.o
+peci-dimmtemp-y := dimmtemp.o
+
+obj-$(CONFIG_SENSORS_PECI_CPUTEMP) += peci-cputemp.o
+obj-$(CONFIG_SENSORS_PECI_DIMMTEMP) += peci-dimmtemp.o
diff --git a/drivers/hwmon/peci/common.h b/drivers/hwmon/peci/common.h
new file mode 100644
index 000000000..734506b0e
--- /dev/null
+++ b/drivers/hwmon/peci/common.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021 Intel Corporation */
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#ifndef __PECI_HWMON_COMMON_H
+#define __PECI_HWMON_COMMON_H
+
+#define PECI_HWMON_UPDATE_INTERVAL HZ
+
+/**
+ * struct peci_sensor_state - PECI state information
+ * @valid: flag to indicate the sensor value is valid
+ * @last_updated: time of the last update in jiffies
+ * @lock: mutex to protect sensor access
+ */
+struct peci_sensor_state {
+ bool valid;
+ unsigned long last_updated;
+ struct mutex lock; /* protect sensor access */
+};
+
+/**
+ * struct peci_sensor_data - PECI sensor information
+ * @value: sensor value in milli units
+ * @state: sensor update state
+ */
+
+struct peci_sensor_data {
+ s32 value;
+ struct peci_sensor_state state;
+};
+
+/**
+ * peci_sensor_need_update() - check whether sensor update is needed or not
+ * @sensor: pointer to sensor data struct
+ *
+ * Return: true if update is needed, false if not.
+ */
+
+static inline bool peci_sensor_need_update(struct peci_sensor_state *state)
+{
+ return !state->valid ||
+ time_after(jiffies, state->last_updated + PECI_HWMON_UPDATE_INTERVAL);
+}
+
+/**
+ * peci_sensor_mark_updated() - mark the sensor is updated
+ * @sensor: pointer to sensor data struct
+ */
+static inline void peci_sensor_mark_updated(struct peci_sensor_state *state)
+{
+ state->valid = true;
+ state->last_updated = jiffies;
+}
+
+#endif /* __PECI_HWMON_COMMON_H */
diff --git a/drivers/hwmon/peci/cputemp.c b/drivers/hwmon/peci/cputemp.c
new file mode 100644
index 000000000..87d56f0fc
--- /dev/null
+++ b/drivers/hwmon/peci/cputemp.c
@@ -0,0 +1,592 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2018-2021 Intel Corporation
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/peci.h>
+#include <linux/peci-cpu.h>
+#include <linux/units.h>
+
+#include "common.h"
+
+#define CORE_NUMS_MAX 64
+
+#define BASE_CHANNEL_NUMS 5
+#define CPUTEMP_CHANNEL_NUMS (BASE_CHANNEL_NUMS + CORE_NUMS_MAX)
+
+#define TEMP_TARGET_FAN_TEMP_MASK GENMASK(15, 8)
+#define TEMP_TARGET_REF_TEMP_MASK GENMASK(23, 16)
+#define TEMP_TARGET_TJ_OFFSET_MASK GENMASK(29, 24)
+
+#define DTS_MARGIN_MASK GENMASK(15, 0)
+#define PCS_MODULE_TEMP_MASK GENMASK(15, 0)
+
+struct resolved_cores_reg {
+ u8 bus;
+ u8 dev;
+ u8 func;
+ u8 offset;
+};
+
+struct cpu_info {
+ struct resolved_cores_reg *reg;
+ u8 min_peci_revision;
+ s32 (*thermal_margin_to_millidegree)(u16 val);
+};
+
+struct peci_temp_target {
+ s32 tcontrol;
+ s32 tthrottle;
+ s32 tjmax;
+ struct peci_sensor_state state;
+};
+
+enum peci_temp_target_type {
+ tcontrol_type,
+ tthrottle_type,
+ tjmax_type,
+ crit_hyst_type,
+};
+
+struct peci_cputemp {
+ struct peci_device *peci_dev;
+ struct device *dev;
+ const char *name;
+ const struct cpu_info *gen_info;
+ struct {
+ struct peci_temp_target target;
+ struct peci_sensor_data die;
+ struct peci_sensor_data dts;
+ struct peci_sensor_data core[CORE_NUMS_MAX];
+ } temp;
+ const char **coretemp_label;
+ DECLARE_BITMAP(core_mask, CORE_NUMS_MAX);
+};
+
+enum cputemp_channels {
+ channel_die,
+ channel_dts,
+ channel_tcontrol,
+ channel_tthrottle,
+ channel_tjmax,
+ channel_core,
+};
+
+static const char * const cputemp_label[BASE_CHANNEL_NUMS] = {
+ "Die",
+ "DTS",
+ "Tcontrol",
+ "Tthrottle",
+ "Tjmax",
+};
+
+static int update_temp_target(struct peci_cputemp *priv)
+{
+ s32 tthrottle_offset, tcontrol_margin;
+ u32 pcs;
+ int ret;
+
+ if (!peci_sensor_need_update(&priv->temp.target.state))
+ return 0;
+
+ ret = peci_pcs_read(priv->peci_dev, PECI_PCS_TEMP_TARGET, 0, &pcs);
+ if (ret)
+ return ret;
+
+ priv->temp.target.tjmax =
+ FIELD_GET(TEMP_TARGET_REF_TEMP_MASK, pcs) * MILLIDEGREE_PER_DEGREE;
+
+ tcontrol_margin = FIELD_GET(TEMP_TARGET_FAN_TEMP_MASK, pcs);
+ tcontrol_margin = sign_extend32(tcontrol_margin, 7) * MILLIDEGREE_PER_DEGREE;
+ priv->temp.target.tcontrol = priv->temp.target.tjmax - tcontrol_margin;
+
+ tthrottle_offset = FIELD_GET(TEMP_TARGET_TJ_OFFSET_MASK, pcs) * MILLIDEGREE_PER_DEGREE;
+ priv->temp.target.tthrottle = priv->temp.target.tjmax - tthrottle_offset;
+
+ peci_sensor_mark_updated(&priv->temp.target.state);
+
+ return 0;
+}
+
+static int get_temp_target(struct peci_cputemp *priv, enum peci_temp_target_type type, long *val)
+{
+ int ret;
+
+ mutex_lock(&priv->temp.target.state.lock);
+
+ ret = update_temp_target(priv);
+ if (ret)
+ goto unlock;
+
+ switch (type) {
+ case tcontrol_type:
+ *val = priv->temp.target.tcontrol;
+ break;
+ case tthrottle_type:
+ *val = priv->temp.target.tthrottle;
+ break;
+ case tjmax_type:
+ *val = priv->temp.target.tjmax;
+ break;
+ case crit_hyst_type:
+ *val = priv->temp.target.tjmax - priv->temp.target.tcontrol;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+unlock:
+ mutex_unlock(&priv->temp.target.state.lock);
+
+ return ret;
+}
+
+/*
+ * Error codes:
+ * 0x8000: General sensor error
+ * 0x8001: Reserved
+ * 0x8002: Underflow on reading value
+ * 0x8003-0x81ff: Reserved
+ */
+static bool dts_valid(u16 val)
+{
+ return val < 0x8000 || val > 0x81ff;
+}
+
+/*
+ * Processors return a value of DTS reading in S10.6 fixed point format
+ * (16 bits: 10-bit signed magnitude, 6-bit fraction).
+ */
+static s32 dts_ten_dot_six_to_millidegree(u16 val)
+{
+ return sign_extend32(val, 15) * MILLIDEGREE_PER_DEGREE / 64;
+}
+
+/*
+ * For older processors, thermal margin reading is returned in S8.8 fixed
+ * point format (16 bits: 8-bit signed magnitude, 8-bit fraction).
+ */
+static s32 dts_eight_dot_eight_to_millidegree(u16 val)
+{
+ return sign_extend32(val, 15) * MILLIDEGREE_PER_DEGREE / 256;
+}
+
+static int get_die_temp(struct peci_cputemp *priv, long *val)
+{
+ int ret = 0;
+ long tjmax;
+ u16 temp;
+
+ mutex_lock(&priv->temp.die.state.lock);
+ if (!peci_sensor_need_update(&priv->temp.die.state))
+ goto skip_update;
+
+ ret = peci_temp_read(priv->peci_dev, &temp);
+ if (ret)
+ goto err_unlock;
+
+ if (!dts_valid(temp)) {
+ ret = -EIO;
+ goto err_unlock;
+ }
+
+ ret = get_temp_target(priv, tjmax_type, &tjmax);
+ if (ret)
+ goto err_unlock;
+
+ priv->temp.die.value = (s32)tjmax + dts_ten_dot_six_to_millidegree(temp);
+
+ peci_sensor_mark_updated(&priv->temp.die.state);
+
+skip_update:
+ *val = priv->temp.die.value;
+err_unlock:
+ mutex_unlock(&priv->temp.die.state.lock);
+ return ret;
+}
+
+static int get_dts(struct peci_cputemp *priv, long *val)
+{
+ int ret = 0;
+ u16 thermal_margin;
+ long tcontrol;
+ u32 pcs;
+
+ mutex_lock(&priv->temp.dts.state.lock);
+ if (!peci_sensor_need_update(&priv->temp.dts.state))
+ goto skip_update;
+
+ ret = peci_pcs_read(priv->peci_dev, PECI_PCS_THERMAL_MARGIN, 0, &pcs);
+ if (ret)
+ goto err_unlock;
+
+ thermal_margin = FIELD_GET(DTS_MARGIN_MASK, pcs);
+ if (!dts_valid(thermal_margin)) {
+ ret = -EIO;
+ goto err_unlock;
+ }
+
+ ret = get_temp_target(priv, tcontrol_type, &tcontrol);
+ if (ret)
+ goto err_unlock;
+
+ /* Note that the tcontrol should be available before calling it */
+ priv->temp.dts.value =
+ (s32)tcontrol - priv->gen_info->thermal_margin_to_millidegree(thermal_margin);
+
+ peci_sensor_mark_updated(&priv->temp.dts.state);
+
+skip_update:
+ *val = priv->temp.dts.value;
+err_unlock:
+ mutex_unlock(&priv->temp.dts.state.lock);
+ return ret;
+}
+
+static int get_core_temp(struct peci_cputemp *priv, int core_index, long *val)
+{
+ int ret = 0;
+ u16 core_dts_margin;
+ long tjmax;
+ u32 pcs;
+
+ mutex_lock(&priv->temp.core[core_index].state.lock);
+ if (!peci_sensor_need_update(&priv->temp.core[core_index].state))
+ goto skip_update;
+
+ ret = peci_pcs_read(priv->peci_dev, PECI_PCS_MODULE_TEMP, core_index, &pcs);
+ if (ret)
+ goto err_unlock;
+
+ core_dts_margin = FIELD_GET(PCS_MODULE_TEMP_MASK, pcs);
+ if (!dts_valid(core_dts_margin)) {
+ ret = -EIO;
+ goto err_unlock;
+ }
+
+ ret = get_temp_target(priv, tjmax_type, &tjmax);
+ if (ret)
+ goto err_unlock;
+
+ /* Note that the tjmax should be available before calling it */
+ priv->temp.core[core_index].value =
+ (s32)tjmax + dts_ten_dot_six_to_millidegree(core_dts_margin);
+
+ peci_sensor_mark_updated(&priv->temp.core[core_index].state);
+
+skip_update:
+ *val = priv->temp.core[core_index].value;
+err_unlock:
+ mutex_unlock(&priv->temp.core[core_index].state.lock);
+ return ret;
+}
+
+static int cputemp_read_string(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+{
+ struct peci_cputemp *priv = dev_get_drvdata(dev);
+
+ if (attr != hwmon_temp_label)
+ return -EOPNOTSUPP;
+
+ *str = channel < channel_core ?
+ cputemp_label[channel] : priv->coretemp_label[channel - channel_core];
+
+ return 0;
+}
+
+static int cputemp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct peci_cputemp *priv = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_input:
+ switch (channel) {
+ case channel_die:
+ return get_die_temp(priv, val);
+ case channel_dts:
+ return get_dts(priv, val);
+ case channel_tcontrol:
+ return get_temp_target(priv, tcontrol_type, val);
+ case channel_tthrottle:
+ return get_temp_target(priv, tthrottle_type, val);
+ case channel_tjmax:
+ return get_temp_target(priv, tjmax_type, val);
+ default:
+ return get_core_temp(priv, channel - channel_core, val);
+ }
+ break;
+ case hwmon_temp_max:
+ return get_temp_target(priv, tcontrol_type, val);
+ case hwmon_temp_crit:
+ return get_temp_target(priv, tjmax_type, val);
+ case hwmon_temp_crit_hyst:
+ return get_temp_target(priv, crit_hyst_type, val);
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+static umode_t cputemp_is_visible(const void *data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct peci_cputemp *priv = data;
+
+ if (channel > CPUTEMP_CHANNEL_NUMS)
+ return 0;
+
+ if (channel < channel_core)
+ return 0444;
+
+ if (test_bit(channel - channel_core, priv->core_mask))
+ return 0444;
+
+ return 0;
+}
+
+static int init_core_mask(struct peci_cputemp *priv)
+{
+ struct peci_device *peci_dev = priv->peci_dev;
+ struct resolved_cores_reg *reg = priv->gen_info->reg;
+ u64 core_mask;
+ u32 data;
+ int ret;
+
+ /* Get the RESOLVED_CORES register value */
+ switch (peci_dev->info.model) {
+ case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
+ ret = peci_ep_pci_local_read(peci_dev, 0, reg->bus, reg->dev,
+ reg->func, reg->offset + 4, &data);
+ if (ret)
+ return ret;
+
+ core_mask = (u64)data << 32;
+
+ ret = peci_ep_pci_local_read(peci_dev, 0, reg->bus, reg->dev,
+ reg->func, reg->offset, &data);
+ if (ret)
+ return ret;
+
+ core_mask |= data;
+
+ break;
+ default:
+ ret = peci_pci_local_read(peci_dev, reg->bus, reg->dev,
+ reg->func, reg->offset, &data);
+ if (ret)
+ return ret;
+
+ core_mask = data;
+
+ break;
+ }
+
+ if (!core_mask)
+ return -EIO;
+
+ bitmap_from_u64(priv->core_mask, core_mask);
+
+ return 0;
+}
+
+static int create_temp_label(struct peci_cputemp *priv)
+{
+ unsigned long core_max = find_last_bit(priv->core_mask, CORE_NUMS_MAX);
+ int i;
+
+ priv->coretemp_label = devm_kzalloc(priv->dev, (core_max + 1) * sizeof(char *), GFP_KERNEL);
+ if (!priv->coretemp_label)
+ return -ENOMEM;
+
+ for_each_set_bit(i, priv->core_mask, CORE_NUMS_MAX) {
+ priv->coretemp_label[i] = devm_kasprintf(priv->dev, GFP_KERNEL, "Core %d", i);
+ if (!priv->coretemp_label[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void check_resolved_cores(struct peci_cputemp *priv)
+{
+ /*
+ * Failure to resolve cores is non-critical, we're still able to
+ * provide other sensor data.
+ */
+
+ if (init_core_mask(priv))
+ return;
+
+ if (create_temp_label(priv))
+ bitmap_zero(priv->core_mask, CORE_NUMS_MAX);
+}
+
+static void sensor_init(struct peci_cputemp *priv)
+{
+ int i;
+
+ mutex_init(&priv->temp.target.state.lock);
+ mutex_init(&priv->temp.die.state.lock);
+ mutex_init(&priv->temp.dts.state.lock);
+
+ for_each_set_bit(i, priv->core_mask, CORE_NUMS_MAX)
+ mutex_init(&priv->temp.core[i].state.lock);
+}
+
+static const struct hwmon_ops peci_cputemp_ops = {
+ .is_visible = cputemp_is_visible,
+ .read_string = cputemp_read_string,
+ .read = cputemp_read,
+};
+
+static const struct hwmon_channel_info *peci_cputemp_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ /* Die temperature */
+ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST,
+ /* DTS margin */
+ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_MAX |
+ HWMON_T_CRIT | HWMON_T_CRIT_HYST,
+ /* Tcontrol temperature */
+ HWMON_T_LABEL | HWMON_T_INPUT | HWMON_T_CRIT,
+ /* Tthrottle temperature */
+ HWMON_T_LABEL | HWMON_T_INPUT,
+ /* Tjmax temperature */
+ HWMON_T_LABEL | HWMON_T_INPUT,
+ /* Core temperature - for all core channels */
+ [channel_core ... CPUTEMP_CHANNEL_NUMS - 1] =
+ HWMON_T_LABEL | HWMON_T_INPUT),
+ NULL
+};
+
+static const struct hwmon_chip_info peci_cputemp_chip_info = {
+ .ops = &peci_cputemp_ops,
+ .info = peci_cputemp_info,
+};
+
+static int peci_cputemp_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+{
+ struct device *dev = &adev->dev;
+ struct peci_device *peci_dev = to_peci_device(dev->parent);
+ struct peci_cputemp *priv;
+ struct device *hwmon_dev;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->name = devm_kasprintf(dev, GFP_KERNEL, "peci_cputemp.cpu%d",
+ peci_dev->info.socket_id);
+ if (!priv->name)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ priv->peci_dev = peci_dev;
+ priv->gen_info = (const struct cpu_info *)id->driver_data;
+
+ /*
+ * This is just a sanity check. Since we're using commands that are
+ * guaranteed to be supported on a given platform, we should never see
+ * revision lower than expected.
+ */
+ if (peci_dev->info.peci_revision < priv->gen_info->min_peci_revision)
+ dev_warn(priv->dev,
+ "Unexpected PECI revision %#x, some features may be unavailable\n",
+ peci_dev->info.peci_revision);
+
+ check_resolved_cores(priv);
+
+ sensor_init(priv);
+
+ hwmon_dev = devm_hwmon_device_register_with_info(priv->dev, priv->name,
+ priv, &peci_cputemp_chip_info, NULL);
+
+ return PTR_ERR_OR_ZERO(hwmon_dev);
+}
+
+/*
+ * RESOLVED_CORES PCI configuration register may have different location on
+ * different platforms.
+ */
+static struct resolved_cores_reg resolved_cores_reg_hsx = {
+ .bus = 1,
+ .dev = 30,
+ .func = 3,
+ .offset = 0xb4,
+};
+
+static struct resolved_cores_reg resolved_cores_reg_icx = {
+ .bus = 14,
+ .dev = 30,
+ .func = 3,
+ .offset = 0xd0,
+};
+
+static const struct cpu_info cpu_hsx = {
+ .reg = &resolved_cores_reg_hsx,
+ .min_peci_revision = 0x33,
+ .thermal_margin_to_millidegree = &dts_eight_dot_eight_to_millidegree,
+};
+
+static const struct cpu_info cpu_skx = {
+ .reg = &resolved_cores_reg_hsx,
+ .min_peci_revision = 0x33,
+ .thermal_margin_to_millidegree = &dts_ten_dot_six_to_millidegree,
+};
+
+static const struct cpu_info cpu_icx = {
+ .reg = &resolved_cores_reg_icx,
+ .min_peci_revision = 0x40,
+ .thermal_margin_to_millidegree = &dts_ten_dot_six_to_millidegree,
+};
+
+static const struct auxiliary_device_id peci_cputemp_ids[] = {
+ {
+ .name = "peci_cpu.cputemp.hsx",
+ .driver_data = (kernel_ulong_t)&cpu_hsx,
+ },
+ {
+ .name = "peci_cpu.cputemp.bdx",
+ .driver_data = (kernel_ulong_t)&cpu_hsx,
+ },
+ {
+ .name = "peci_cpu.cputemp.bdxd",
+ .driver_data = (kernel_ulong_t)&cpu_hsx,
+ },
+ {
+ .name = "peci_cpu.cputemp.skx",
+ .driver_data = (kernel_ulong_t)&cpu_skx,
+ },
+ {
+ .name = "peci_cpu.cputemp.icx",
+ .driver_data = (kernel_ulong_t)&cpu_icx,
+ },
+ {
+ .name = "peci_cpu.cputemp.icxd",
+ .driver_data = (kernel_ulong_t)&cpu_icx,
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(auxiliary, peci_cputemp_ids);
+
+static struct auxiliary_driver peci_cputemp_driver = {
+ .probe = peci_cputemp_probe,
+ .id_table = peci_cputemp_ids,
+};
+
+module_auxiliary_driver(peci_cputemp_driver);
+
+MODULE_AUTHOR("Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>");
+MODULE_AUTHOR("Iwona Winiarska <iwona.winiarska@intel.com>");
+MODULE_DESCRIPTION("PECI cputemp driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PECI_CPU);
diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c
new file mode 100644
index 000000000..0a633bda3
--- /dev/null
+++ b/drivers/hwmon/peci/dimmtemp.c
@@ -0,0 +1,615 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2018-2021 Intel Corporation
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/devm-helpers.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/peci.h>
+#include <linux/peci-cpu.h>
+#include <linux/units.h>
+#include <linux/workqueue.h>
+
+#include "common.h"
+
+#define DIMM_MASK_CHECK_DELAY_JIFFIES msecs_to_jiffies(5000)
+
+/* Max number of channel ranks and DIMM index per channel */
+#define CHAN_RANK_MAX_ON_HSX 8
+#define DIMM_IDX_MAX_ON_HSX 3
+#define CHAN_RANK_MAX_ON_BDX 4
+#define DIMM_IDX_MAX_ON_BDX 3
+#define CHAN_RANK_MAX_ON_BDXD 2
+#define DIMM_IDX_MAX_ON_BDXD 2
+#define CHAN_RANK_MAX_ON_SKX 6
+#define DIMM_IDX_MAX_ON_SKX 2
+#define CHAN_RANK_MAX_ON_ICX 8
+#define DIMM_IDX_MAX_ON_ICX 2
+#define CHAN_RANK_MAX_ON_ICXD 4
+#define DIMM_IDX_MAX_ON_ICXD 2
+
+#define CHAN_RANK_MAX CHAN_RANK_MAX_ON_HSX
+#define DIMM_IDX_MAX DIMM_IDX_MAX_ON_HSX
+#define DIMM_NUMS_MAX (CHAN_RANK_MAX * DIMM_IDX_MAX)
+
+#define CPU_SEG_MASK GENMASK(23, 16)
+#define GET_CPU_SEG(x) (((x) & CPU_SEG_MASK) >> 16)
+#define CPU_BUS_MASK GENMASK(7, 0)
+#define GET_CPU_BUS(x) ((x) & CPU_BUS_MASK)
+
+#define DIMM_TEMP_MAX GENMASK(15, 8)
+#define DIMM_TEMP_CRIT GENMASK(23, 16)
+#define GET_TEMP_MAX(x) (((x) & DIMM_TEMP_MAX) >> 8)
+#define GET_TEMP_CRIT(x) (((x) & DIMM_TEMP_CRIT) >> 16)
+
+#define NO_DIMM_RETRY_COUNT_MAX 5
+
+struct peci_dimmtemp;
+
+struct dimm_info {
+ int chan_rank_max;
+ int dimm_idx_max;
+ u8 min_peci_revision;
+ int (*read_thresholds)(struct peci_dimmtemp *priv, int dimm_order,
+ int chan_rank, u32 *data);
+};
+
+struct peci_dimm_thresholds {
+ long temp_max;
+ long temp_crit;
+ struct peci_sensor_state state;
+};
+
+enum peci_dimm_threshold_type {
+ temp_max_type,
+ temp_crit_type,
+};
+
+struct peci_dimmtemp {
+ struct peci_device *peci_dev;
+ struct device *dev;
+ const char *name;
+ const struct dimm_info *gen_info;
+ struct delayed_work detect_work;
+ struct {
+ struct peci_sensor_data temp;
+ struct peci_dimm_thresholds thresholds;
+ } dimm[DIMM_NUMS_MAX];
+ char **dimmtemp_label;
+ DECLARE_BITMAP(dimm_mask, DIMM_NUMS_MAX);
+ u8 no_dimm_retry_count;
+};
+
+static u8 __dimm_temp(u32 reg, int dimm_order)
+{
+ return (reg >> (dimm_order * 8)) & 0xff;
+}
+
+static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no, long *val)
+{
+ int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+ int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+ int ret = 0;
+ u32 data;
+
+ mutex_lock(&priv->dimm[dimm_no].temp.state.lock);
+ if (!peci_sensor_need_update(&priv->dimm[dimm_no].temp.state))
+ goto skip_update;
+
+ ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP, chan_rank, &data);
+ if (ret)
+ goto unlock;
+
+ priv->dimm[dimm_no].temp.value = __dimm_temp(data, dimm_order) * MILLIDEGREE_PER_DEGREE;
+
+ peci_sensor_mark_updated(&priv->dimm[dimm_no].temp.state);
+
+skip_update:
+ *val = priv->dimm[dimm_no].temp.value;
+unlock:
+ mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
+ return ret;
+}
+
+static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no)
+{
+ int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+ int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+ u32 data;
+ int ret;
+
+ if (!peci_sensor_need_update(&priv->dimm[dimm_no].thresholds.state))
+ return 0;
+
+ ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank, &data);
+ if (ret == -ENODATA) /* Use default or previous value */
+ return 0;
+ if (ret)
+ return ret;
+
+ priv->dimm[dimm_no].thresholds.temp_max = GET_TEMP_MAX(data) * MILLIDEGREE_PER_DEGREE;
+ priv->dimm[dimm_no].thresholds.temp_crit = GET_TEMP_CRIT(data) * MILLIDEGREE_PER_DEGREE;
+
+ peci_sensor_mark_updated(&priv->dimm[dimm_no].thresholds.state);
+
+ return 0;
+}
+
+static int get_dimm_thresholds(struct peci_dimmtemp *priv, enum peci_dimm_threshold_type type,
+ int dimm_no, long *val)
+{
+ int ret;
+
+ mutex_lock(&priv->dimm[dimm_no].thresholds.state.lock);
+ ret = update_thresholds(priv, dimm_no);
+ if (ret)
+ goto unlock;
+
+ switch (type) {
+ case temp_max_type:
+ *val = priv->dimm[dimm_no].thresholds.temp_max;
+ break;
+ case temp_crit_type:
+ *val = priv->dimm[dimm_no].thresholds.temp_crit;
+ break;
+ default:
+ ret = -EOPNOTSUPP;
+ break;
+ }
+unlock:
+ mutex_unlock(&priv->dimm[dimm_no].thresholds.state.lock);
+
+ return ret;
+}
+
+static int dimmtemp_read_string(struct device *dev,
+ enum hwmon_sensor_types type,
+ u32 attr, int channel, const char **str)
+{
+ struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+ if (attr != hwmon_temp_label)
+ return -EOPNOTSUPP;
+
+ *str = (const char *)priv->dimmtemp_label[channel];
+
+ return 0;
+}
+
+static int dimmtemp_read(struct device *dev, enum hwmon_sensor_types type,
+ u32 attr, int channel, long *val)
+{
+ struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+ switch (attr) {
+ case hwmon_temp_input:
+ return get_dimm_temp(priv, channel, val);
+ case hwmon_temp_max:
+ return get_dimm_thresholds(priv, temp_max_type, channel, val);
+ case hwmon_temp_crit:
+ return get_dimm_thresholds(priv, temp_crit_type, channel, val);
+ default:
+ break;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static umode_t dimmtemp_is_visible(const void *data, enum hwmon_sensor_types type,
+ u32 attr, int channel)
+{
+ const struct peci_dimmtemp *priv = data;
+
+ if (test_bit(channel, priv->dimm_mask))
+ return 0444;
+
+ return 0;
+}
+
+static const struct hwmon_ops peci_dimmtemp_ops = {
+ .is_visible = dimmtemp_is_visible,
+ .read_string = dimmtemp_read_string,
+ .read = dimmtemp_read,
+};
+
+static int check_populated_dimms(struct peci_dimmtemp *priv)
+{
+ int chan_rank_max = priv->gen_info->chan_rank_max;
+ int dimm_idx_max = priv->gen_info->dimm_idx_max;
+ u32 chan_rank_empty = 0;
+ u32 dimm_mask = 0;
+ int chan_rank, dimm_idx, ret;
+ u32 pcs;
+
+ BUILD_BUG_ON(BITS_PER_TYPE(chan_rank_empty) < CHAN_RANK_MAX);
+ BUILD_BUG_ON(BITS_PER_TYPE(dimm_mask) < DIMM_NUMS_MAX);
+ if (chan_rank_max * dimm_idx_max > DIMM_NUMS_MAX) {
+ WARN_ONCE(1, "Unsupported number of DIMMs - chan_rank_max: %d, dimm_idx_max: %d",
+ chan_rank_max, dimm_idx_max);
+ return -EINVAL;
+ }
+
+ for (chan_rank = 0; chan_rank < chan_rank_max; chan_rank++) {
+ ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP, chan_rank, &pcs);
+ if (ret) {
+ /*
+ * Overall, we expect either success or -EINVAL in
+ * order to determine whether DIMM is populated or not.
+ * For anything else we fall back to deferring the
+ * detection to be performed at a later point in time.
+ */
+ if (ret == -EINVAL) {
+ chan_rank_empty |= BIT(chan_rank);
+ continue;
+ }
+
+ return -EAGAIN;
+ }
+
+ for (dimm_idx = 0; dimm_idx < dimm_idx_max; dimm_idx++)
+ if (__dimm_temp(pcs, dimm_idx))
+ dimm_mask |= BIT(chan_rank * dimm_idx_max + dimm_idx);
+ }
+
+ /*
+ * If we got all -EINVALs, it means that the CPU doesn't have any
+ * DIMMs. Unfortunately, it may also happen at the very start of
+ * host platform boot. Retrying a couple of times lets us make sure
+ * that the state is persistent.
+ */
+ if (chan_rank_empty == GENMASK(chan_rank_max - 1, 0)) {
+ if (priv->no_dimm_retry_count < NO_DIMM_RETRY_COUNT_MAX) {
+ priv->no_dimm_retry_count++;
+
+ return -EAGAIN;
+ }
+
+ return -ENODEV;
+ }
+
+ /*
+ * It's possible that memory training is not done yet. In this case we
+ * defer the detection to be performed at a later point in time.
+ */
+ if (!dimm_mask) {
+ priv->no_dimm_retry_count = 0;
+ return -EAGAIN;
+ }
+
+ dev_dbg(priv->dev, "Scanned populated DIMMs: %#x\n", dimm_mask);
+
+ bitmap_from_arr32(priv->dimm_mask, &dimm_mask, DIMM_NUMS_MAX);
+
+ return 0;
+}
+
+static int create_dimm_temp_label(struct peci_dimmtemp *priv, int chan)
+{
+ int rank = chan / priv->gen_info->dimm_idx_max;
+ int idx = chan % priv->gen_info->dimm_idx_max;
+
+ priv->dimmtemp_label[chan] = devm_kasprintf(priv->dev, GFP_KERNEL,
+ "DIMM %c%d", 'A' + rank,
+ idx + 1);
+ if (!priv->dimmtemp_label[chan])
+ return -ENOMEM;
+
+ return 0;
+}
+
+static const struct hwmon_channel_info *peci_dimmtemp_temp_info[] = {
+ HWMON_CHANNEL_INFO(temp,
+ [0 ... DIMM_NUMS_MAX - 1] = HWMON_T_LABEL |
+ HWMON_T_INPUT | HWMON_T_MAX | HWMON_T_CRIT),
+ NULL
+};
+
+static const struct hwmon_chip_info peci_dimmtemp_chip_info = {
+ .ops = &peci_dimmtemp_ops,
+ .info = peci_dimmtemp_temp_info,
+};
+
+static int create_dimm_temp_info(struct peci_dimmtemp *priv)
+{
+ int ret, i, channels;
+ struct device *dev;
+
+ /*
+ * We expect to either find populated DIMMs and carry on with creating
+ * sensors, or find out that there are no DIMMs populated.
+ * All other states mean that the platform never reached the state that
+ * allows to check DIMM state - causing us to retry later on.
+ */
+ ret = check_populated_dimms(priv);
+ if (ret == -ENODEV) {
+ dev_dbg(priv->dev, "No DIMMs found\n");
+ return 0;
+ } else if (ret) {
+ schedule_delayed_work(&priv->detect_work, DIMM_MASK_CHECK_DELAY_JIFFIES);
+ dev_dbg(priv->dev, "Deferred populating DIMM temp info\n");
+ return ret;
+ }
+
+ channels = priv->gen_info->chan_rank_max * priv->gen_info->dimm_idx_max;
+
+ priv->dimmtemp_label = devm_kzalloc(priv->dev, channels * sizeof(char *), GFP_KERNEL);
+ if (!priv->dimmtemp_label)
+ return -ENOMEM;
+
+ for_each_set_bit(i, priv->dimm_mask, DIMM_NUMS_MAX) {
+ ret = create_dimm_temp_label(priv, i);
+ if (ret)
+ return ret;
+ mutex_init(&priv->dimm[i].thresholds.state.lock);
+ mutex_init(&priv->dimm[i].temp.state.lock);
+ }
+
+ dev = devm_hwmon_device_register_with_info(priv->dev, priv->name, priv,
+ &peci_dimmtemp_chip_info, NULL);
+ if (IS_ERR(dev)) {
+ dev_err(priv->dev, "Failed to register hwmon device\n");
+ return PTR_ERR(dev);
+ }
+
+ dev_dbg(priv->dev, "%s: sensor '%s'\n", dev_name(dev), priv->name);
+
+ return 0;
+}
+
+static void create_dimm_temp_info_delayed(struct work_struct *work)
+{
+ struct peci_dimmtemp *priv = container_of(to_delayed_work(work),
+ struct peci_dimmtemp,
+ detect_work);
+ int ret;
+
+ ret = create_dimm_temp_info(priv);
+ if (ret && ret != -EAGAIN)
+ dev_err(priv->dev, "Failed to populate DIMM temp info\n");
+}
+
+static int peci_dimmtemp_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id)
+{
+ struct device *dev = &adev->dev;
+ struct peci_device *peci_dev = to_peci_device(dev->parent);
+ struct peci_dimmtemp *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->name = devm_kasprintf(dev, GFP_KERNEL, "peci_dimmtemp.cpu%d",
+ peci_dev->info.socket_id);
+ if (!priv->name)
+ return -ENOMEM;
+
+ priv->dev = dev;
+ priv->peci_dev = peci_dev;
+ priv->gen_info = (const struct dimm_info *)id->driver_data;
+
+ /*
+ * This is just a sanity check. Since we're using commands that are
+ * guaranteed to be supported on a given platform, we should never see
+ * revision lower than expected.
+ */
+ if (peci_dev->info.peci_revision < priv->gen_info->min_peci_revision)
+ dev_warn(priv->dev,
+ "Unexpected PECI revision %#x, some features may be unavailable\n",
+ peci_dev->info.peci_revision);
+
+ ret = devm_delayed_work_autocancel(priv->dev, &priv->detect_work,
+ create_dimm_temp_info_delayed);
+ if (ret)
+ return ret;
+
+ ret = create_dimm_temp_info(priv);
+ if (ret && ret != -EAGAIN) {
+ dev_err(dev, "Failed to populate DIMM temp info\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+read_thresholds_hsx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u32 *data)
+{
+ u8 dev, func;
+ u16 reg;
+ int ret;
+
+ /*
+ * Device 20, Function 0: IMC 0 channel 0 -> rank 0
+ * Device 20, Function 1: IMC 0 channel 1 -> rank 1
+ * Device 21, Function 0: IMC 0 channel 2 -> rank 2
+ * Device 21, Function 1: IMC 0 channel 3 -> rank 3
+ * Device 23, Function 0: IMC 1 channel 0 -> rank 4
+ * Device 23, Function 1: IMC 1 channel 1 -> rank 5
+ * Device 24, Function 0: IMC 1 channel 2 -> rank 6
+ * Device 24, Function 1: IMC 1 channel 3 -> rank 7
+ */
+ dev = 20 + chan_rank / 2 + chan_rank / 4;
+ func = chan_rank % 2;
+ reg = 0x120 + dimm_order * 4;
+
+ ret = peci_pci_local_read(priv->peci_dev, 1, dev, func, reg, data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+read_thresholds_bdxd(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u32 *data)
+{
+ u8 dev, func;
+ u16 reg;
+ int ret;
+
+ /*
+ * Device 10, Function 2: IMC 0 channel 0 -> rank 0
+ * Device 10, Function 6: IMC 0 channel 1 -> rank 1
+ * Device 12, Function 2: IMC 1 channel 0 -> rank 2
+ * Device 12, Function 6: IMC 1 channel 1 -> rank 3
+ */
+ dev = 10 + chan_rank / 2 * 2;
+ func = (chan_rank % 2) ? 6 : 2;
+ reg = 0x120 + dimm_order * 4;
+
+ ret = peci_pci_local_read(priv->peci_dev, 2, dev, func, reg, data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+read_thresholds_skx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u32 *data)
+{
+ u8 dev, func;
+ u16 reg;
+ int ret;
+
+ /*
+ * Device 10, Function 2: IMC 0 channel 0 -> rank 0
+ * Device 10, Function 6: IMC 0 channel 1 -> rank 1
+ * Device 11, Function 2: IMC 0 channel 2 -> rank 2
+ * Device 12, Function 2: IMC 1 channel 0 -> rank 3
+ * Device 12, Function 6: IMC 1 channel 1 -> rank 4
+ * Device 13, Function 2: IMC 1 channel 2 -> rank 5
+ */
+ dev = 10 + chan_rank / 3 * 2 + (chan_rank % 3 == 2 ? 1 : 0);
+ func = chan_rank % 3 == 1 ? 6 : 2;
+ reg = 0x120 + dimm_order * 4;
+
+ ret = peci_pci_local_read(priv->peci_dev, 2, dev, func, reg, data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int
+read_thresholds_icx(struct peci_dimmtemp *priv, int dimm_order, int chan_rank, u32 *data)
+{
+ u32 reg_val;
+ u64 offset;
+ int ret;
+ u8 dev;
+
+ ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd4, &reg_val);
+ if (ret || !(reg_val & BIT(31)))
+ return -ENODATA; /* Use default or previous value */
+
+ ret = peci_ep_pci_local_read(priv->peci_dev, 0, 13, 0, 2, 0xd0, &reg_val);
+ if (ret)
+ return -ENODATA; /* Use default or previous value */
+
+ /*
+ * Device 26, Offset 224e0: IMC 0 channel 0 -> rank 0
+ * Device 26, Offset 264e0: IMC 0 channel 1 -> rank 1
+ * Device 27, Offset 224e0: IMC 1 channel 0 -> rank 2
+ * Device 27, Offset 264e0: IMC 1 channel 1 -> rank 3
+ * Device 28, Offset 224e0: IMC 2 channel 0 -> rank 4
+ * Device 28, Offset 264e0: IMC 2 channel 1 -> rank 5
+ * Device 29, Offset 224e0: IMC 3 channel 0 -> rank 6
+ * Device 29, Offset 264e0: IMC 3 channel 1 -> rank 7
+ */
+ dev = 26 + chan_rank / 2;
+ offset = 0x224e0 + dimm_order * 4 + (chan_rank % 2) * 0x4000;
+
+ ret = peci_mmio_read(priv->peci_dev, 0, GET_CPU_SEG(reg_val), GET_CPU_BUS(reg_val),
+ dev, 0, offset, data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static const struct dimm_info dimm_hsx = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_HSX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_HSX,
+ .min_peci_revision = 0x33,
+ .read_thresholds = &read_thresholds_hsx,
+};
+
+static const struct dimm_info dimm_bdx = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_BDX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_BDX,
+ .min_peci_revision = 0x33,
+ .read_thresholds = &read_thresholds_hsx,
+};
+
+static const struct dimm_info dimm_bdxd = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_BDXD,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_BDXD,
+ .min_peci_revision = 0x33,
+ .read_thresholds = &read_thresholds_bdxd,
+};
+
+static const struct dimm_info dimm_skx = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_SKX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_SKX,
+ .min_peci_revision = 0x33,
+ .read_thresholds = &read_thresholds_skx,
+};
+
+static const struct dimm_info dimm_icx = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_ICX,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_ICX,
+ .min_peci_revision = 0x40,
+ .read_thresholds = &read_thresholds_icx,
+};
+
+static const struct dimm_info dimm_icxd = {
+ .chan_rank_max = CHAN_RANK_MAX_ON_ICXD,
+ .dimm_idx_max = DIMM_IDX_MAX_ON_ICXD,
+ .min_peci_revision = 0x40,
+ .read_thresholds = &read_thresholds_icx,
+};
+
+static const struct auxiliary_device_id peci_dimmtemp_ids[] = {
+ {
+ .name = "peci_cpu.dimmtemp.hsx",
+ .driver_data = (kernel_ulong_t)&dimm_hsx,
+ },
+ {
+ .name = "peci_cpu.dimmtemp.bdx",
+ .driver_data = (kernel_ulong_t)&dimm_bdx,
+ },
+ {
+ .name = "peci_cpu.dimmtemp.bdxd",
+ .driver_data = (kernel_ulong_t)&dimm_bdxd,
+ },
+ {
+ .name = "peci_cpu.dimmtemp.skx",
+ .driver_data = (kernel_ulong_t)&dimm_skx,
+ },
+ {
+ .name = "peci_cpu.dimmtemp.icx",
+ .driver_data = (kernel_ulong_t)&dimm_icx,
+ },
+ {
+ .name = "peci_cpu.dimmtemp.icxd",
+ .driver_data = (kernel_ulong_t)&dimm_icxd,
+ },
+ { }
+};
+MODULE_DEVICE_TABLE(auxiliary, peci_dimmtemp_ids);
+
+static struct auxiliary_driver peci_dimmtemp_driver = {
+ .probe = peci_dimmtemp_probe,
+ .id_table = peci_dimmtemp_ids,
+};
+
+module_auxiliary_driver(peci_dimmtemp_driver);
+
+MODULE_AUTHOR("Jae Hyun Yoo <jae.hyun.yoo@linux.intel.com>");
+MODULE_AUTHOR("Iwona Winiarska <iwona.winiarska@intel.com>");
+MODULE_DESCRIPTION("PECI dimmtemp driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(PECI_CPU);