summaryrefslogtreecommitdiffstats
path: root/drivers/platform/mellanox
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/platform/mellanox')
-rw-r--r--drivers/platform/mellanox/Kconfig60
-rw-r--r--drivers/platform/mellanox/Makefile9
-rw-r--r--drivers/platform/mellanox/mlxbf-bootctl.c334
-rw-r--r--drivers/platform/mellanox/mlxbf-bootctl.h103
-rw-r--r--drivers/platform/mellanox/mlxbf-tmfifo-regs.h63
-rw-r--r--drivers/platform/mellanox/mlxbf-tmfifo.c1342
-rw-r--r--drivers/platform/mellanox/mlxreg-hotplug.c743
-rw-r--r--drivers/platform/mellanox/mlxreg-io.c266
8 files changed, 2920 insertions, 0 deletions
diff --git a/drivers/platform/mellanox/Kconfig b/drivers/platform/mellanox/Kconfig
new file mode 100644
index 000000000..1a11d1a44
--- /dev/null
+++ b/drivers/platform/mellanox/Kconfig
@@ -0,0 +1,60 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Platform support for Mellanox hardware
+#
+
+menuconfig MELLANOX_PLATFORM
+ bool "Platform support for Mellanox hardware"
+ depends on X86 || ARM || ARM64 || COMPILE_TEST
+ help
+ Say Y here to get to see options for platform support for
+ Mellanox systems. This option alone does not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if MELLANOX_PLATFORM
+
+config MLXREG_HOTPLUG
+ tristate "Mellanox platform hotplug driver support"
+ depends on REGMAP
+ depends on HWMON
+ depends on I2C
+ help
+ This driver handles hot-plug events for the power suppliers, power
+ cables and fans on the wide range Mellanox IB and Ethernet systems.
+
+config MLXREG_IO
+ tristate "Mellanox platform register access driver support"
+ depends on REGMAP
+ depends on HWMON
+ help
+ This driver allows access to Mellanox programmable device register
+ space through sysfs interface. The sets of registers for sysfs access
+ are defined per system type bases and include the registers related
+ to system resets operation, system reset causes monitoring and some
+ kinds of mux selection.
+
+config MLXBF_TMFIFO
+ tristate "Mellanox BlueField SoC TmFifo platform driver"
+ depends on ARM64
+ depends on ACPI
+ depends on VIRTIO_CONSOLE && VIRTIO_NET
+ help
+ Say y here to enable TmFifo support. The TmFifo driver provides
+ platform driver support for the TmFifo which supports console
+ and networking based on the virtio framework.
+
+config MLXBF_BOOTCTL
+ tristate "Mellanox BlueField Firmware Boot Control driver"
+ depends on ARM64
+ depends on ACPI
+ depends on NET
+ help
+ The Mellanox BlueField firmware implements functionality to
+ request swapping the primary and alternate eMMC boot partition,
+ and to set up a watchdog that can undo that swap if the system
+ does not boot up correctly. This driver provides sysfs access
+ to the userspace tools, to be used in conjunction with the eMMC
+ device driver to do necessary initial swap of the boot partition.
+
+endif # MELLANOX_PLATFORM
diff --git a/drivers/platform/mellanox/Makefile b/drivers/platform/mellanox/Makefile
new file mode 100644
index 000000000..499623ccf
--- /dev/null
+++ b/drivers/platform/mellanox/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for linux/drivers/platform/mellanox
+# Mellanox Platform-Specific Drivers
+#
+obj-$(CONFIG_MLXBF_BOOTCTL) += mlxbf-bootctl.o
+obj-$(CONFIG_MLXBF_TMFIFO) += mlxbf-tmfifo.o
+obj-$(CONFIG_MLXREG_HOTPLUG) += mlxreg-hotplug.o
+obj-$(CONFIG_MLXREG_IO) += mlxreg-io.o
diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c
new file mode 100644
index 000000000..9911d4f85
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-bootctl.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox boot control driver
+ *
+ * This driver provides a sysfs interface for systems management
+ * software to manage reset-time actions.
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/arm-smccc.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+#include "mlxbf-bootctl.h"
+
+#define MLXBF_BOOTCTL_SB_SECURE_MASK 0x03
+#define MLXBF_BOOTCTL_SB_TEST_MASK 0x0c
+#define MLXBF_BOOTCTL_SB_DEV_MASK BIT(4)
+
+#define MLXBF_SB_KEY_NUM 4
+
+/* UUID used to probe ATF service. */
+static const char *mlxbf_bootctl_svc_uuid_str =
+ "89c036b4-e7d7-11e6-8797-001aca00bfc4";
+
+struct mlxbf_bootctl_name {
+ u32 value;
+ const char *name;
+};
+
+static struct mlxbf_bootctl_name boot_names[] = {
+ { MLXBF_BOOTCTL_EXTERNAL, "external" },
+ { MLXBF_BOOTCTL_EMMC, "emmc" },
+ { MLNX_BOOTCTL_SWAP_EMMC, "swap_emmc" },
+ { MLXBF_BOOTCTL_EMMC_LEGACY, "emmc_legacy" },
+ { MLXBF_BOOTCTL_NONE, "none" },
+};
+
+enum {
+ MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION = 0,
+ MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE = 1,
+ MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE = 2,
+ MLXBF_BOOTCTL_SB_LIFECYCLE_RMA = 3
+};
+
+static const char * const mlxbf_bootctl_lifecycle_states[] = {
+ [MLXBF_BOOTCTL_SB_LIFECYCLE_PRODUCTION] = "Production",
+ [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE] = "GA Secured",
+ [MLXBF_BOOTCTL_SB_LIFECYCLE_GA_NON_SECURE] = "GA Non-Secured",
+ [MLXBF_BOOTCTL_SB_LIFECYCLE_RMA] = "RMA",
+};
+
+/* ARM SMC call which is atomic and no need for lock. */
+static int mlxbf_bootctl_smc(unsigned int smc_op, int smc_arg)
+{
+ struct arm_smccc_res res;
+
+ arm_smccc_smc(smc_op, smc_arg, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+/* Return the action in integer or an error code. */
+static int mlxbf_bootctl_reset_action_to_val(const char *action)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(boot_names); i++)
+ if (sysfs_streq(boot_names[i].name, action))
+ return boot_names[i].value;
+
+ return -EINVAL;
+}
+
+/* Return the action in string. */
+static const char *mlxbf_bootctl_action_to_string(int action)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(boot_names); i++)
+ if (boot_names[i].value == action)
+ return boot_names[i].name;
+
+ return "invalid action";
+}
+
+static ssize_t post_reset_wdog_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int ret;
+
+ ret = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_POST_RESET_WDOG, 0);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%d\n", ret);
+}
+
+static ssize_t post_reset_wdog_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long value;
+ int ret;
+
+ ret = kstrtoul(buf, 10, &value);
+ if (ret)
+ return ret;
+
+ ret = mlxbf_bootctl_smc(MLXBF_BOOTCTL_SET_POST_RESET_WDOG, value);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t mlxbf_bootctl_show(int smc_op, char *buf)
+{
+ int action;
+
+ action = mlxbf_bootctl_smc(smc_op, 0);
+ if (action < 0)
+ return action;
+
+ return sprintf(buf, "%s\n", mlxbf_bootctl_action_to_string(action));
+}
+
+static int mlxbf_bootctl_store(int smc_op, const char *buf, size_t count)
+{
+ int ret, action;
+
+ action = mlxbf_bootctl_reset_action_to_val(buf);
+ if (action < 0)
+ return action;
+
+ ret = mlxbf_bootctl_smc(smc_op, action);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static ssize_t reset_action_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return mlxbf_bootctl_show(MLXBF_BOOTCTL_GET_RESET_ACTION, buf);
+}
+
+static ssize_t reset_action_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return mlxbf_bootctl_store(MLXBF_BOOTCTL_SET_RESET_ACTION, buf, count);
+}
+
+static ssize_t second_reset_action_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return mlxbf_bootctl_show(MLXBF_BOOTCTL_GET_SECOND_RESET_ACTION, buf);
+}
+
+static ssize_t second_reset_action_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ return mlxbf_bootctl_store(MLXBF_BOOTCTL_SET_SECOND_RESET_ACTION, buf,
+ count);
+}
+
+static ssize_t lifecycle_state_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ int status_bits;
+ int use_dev_key;
+ int test_state;
+ int lc_state;
+
+ status_bits = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS,
+ MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE);
+ if (status_bits < 0)
+ return status_bits;
+
+ use_dev_key = status_bits & MLXBF_BOOTCTL_SB_DEV_MASK;
+ test_state = status_bits & MLXBF_BOOTCTL_SB_TEST_MASK;
+ lc_state = status_bits & MLXBF_BOOTCTL_SB_SECURE_MASK;
+
+ /*
+ * If the test bits are set, we specify that the current state may be
+ * due to using the test bits.
+ */
+ if (test_state) {
+ return sprintf(buf, "%s(test)\n",
+ mlxbf_bootctl_lifecycle_states[lc_state]);
+ } else if (use_dev_key &&
+ (lc_state == MLXBF_BOOTCTL_SB_LIFECYCLE_GA_SECURE)) {
+ return sprintf(buf, "Secured (development)\n");
+ }
+
+ return sprintf(buf, "%s\n", mlxbf_bootctl_lifecycle_states[lc_state]);
+}
+
+static ssize_t secure_boot_fuse_state_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int burnt, valid, key, key_state, buf_len = 0, upper_key_used = 0;
+ const char *status;
+
+ key_state = mlxbf_bootctl_smc(MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS,
+ MLXBF_BOOTCTL_FUSE_STATUS_KEYS);
+ if (key_state < 0)
+ return key_state;
+
+ /*
+ * key_state contains the bits for 4 Key versions, loaded from eFuses
+ * after a hard reset. Lower 4 bits are a thermometer code indicating
+ * key programming has started for key n (0000 = none, 0001 = version 0,
+ * 0011 = version 1, 0111 = version 2, 1111 = version 3). Upper 4 bits
+ * are a thermometer code indicating key programming has completed for
+ * key n (same encodings as the start bits). This allows for detection
+ * of an interruption in the progamming process which has left the key
+ * partially programmed (and thus invalid). The process is to burn the
+ * eFuse for the new key start bit, burn the key eFuses, then burn the
+ * eFuse for the new key complete bit.
+ *
+ * For example 0000_0000: no key valid, 0001_0001: key version 0 valid,
+ * 0011_0011: key 1 version valid, 0011_0111: key version 2 started
+ * programming but did not complete, etc. The most recent key for which
+ * both start and complete bit is set is loaded. On soft reset, this
+ * register is not modified.
+ */
+ for (key = MLXBF_SB_KEY_NUM - 1; key >= 0; key--) {
+ burnt = key_state & BIT(key);
+ valid = key_state & BIT(key + MLXBF_SB_KEY_NUM);
+
+ if (burnt && valid)
+ upper_key_used = 1;
+
+ if (upper_key_used) {
+ if (burnt)
+ status = valid ? "Used" : "Wasted";
+ else
+ status = valid ? "Invalid" : "Skipped";
+ } else {
+ if (burnt)
+ status = valid ? "InUse" : "Incomplete";
+ else
+ status = valid ? "Invalid" : "Free";
+ }
+ buf_len += sprintf(buf + buf_len, "%d:%s ", key, status);
+ }
+ buf_len += sprintf(buf + buf_len, "\n");
+
+ return buf_len;
+}
+
+static DEVICE_ATTR_RW(post_reset_wdog);
+static DEVICE_ATTR_RW(reset_action);
+static DEVICE_ATTR_RW(second_reset_action);
+static DEVICE_ATTR_RO(lifecycle_state);
+static DEVICE_ATTR_RO(secure_boot_fuse_state);
+
+static struct attribute *mlxbf_bootctl_attrs[] = {
+ &dev_attr_post_reset_wdog.attr,
+ &dev_attr_reset_action.attr,
+ &dev_attr_second_reset_action.attr,
+ &dev_attr_lifecycle_state.attr,
+ &dev_attr_secure_boot_fuse_state.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(mlxbf_bootctl);
+
+static const struct acpi_device_id mlxbf_bootctl_acpi_ids[] = {
+ {"MLNXBF04", 0},
+ {}
+};
+
+MODULE_DEVICE_TABLE(acpi, mlxbf_bootctl_acpi_ids);
+
+static bool mlxbf_bootctl_guid_match(const guid_t *guid,
+ const struct arm_smccc_res *res)
+{
+ guid_t id = GUID_INIT(res->a0, res->a1, res->a1 >> 16,
+ res->a2, res->a2 >> 8, res->a2 >> 16,
+ res->a2 >> 24, res->a3, res->a3 >> 8,
+ res->a3 >> 16, res->a3 >> 24);
+
+ return guid_equal(guid, &id);
+}
+
+static int mlxbf_bootctl_probe(struct platform_device *pdev)
+{
+ struct arm_smccc_res res = { 0 };
+ guid_t guid;
+ int ret;
+
+ /* Ensure we have the UUID we expect for this service. */
+ arm_smccc_smc(MLXBF_BOOTCTL_SIP_SVC_UID, 0, 0, 0, 0, 0, 0, 0, &res);
+ guid_parse(mlxbf_bootctl_svc_uuid_str, &guid);
+ if (!mlxbf_bootctl_guid_match(&guid, &res))
+ return -ENODEV;
+
+ /*
+ * When watchdog is used, it sets boot mode to MLXBF_BOOTCTL_SWAP_EMMC
+ * in case of boot failures. However it doesn't clear the state if there
+ * is no failure. Restore the default boot mode here to avoid any
+ * unnecessary boot partition swapping.
+ */
+ ret = mlxbf_bootctl_smc(MLXBF_BOOTCTL_SET_RESET_ACTION,
+ MLXBF_BOOTCTL_EMMC);
+ if (ret < 0)
+ dev_warn(&pdev->dev, "Unable to reset the EMMC boot mode\n");
+
+ return 0;
+}
+
+static struct platform_driver mlxbf_bootctl_driver = {
+ .probe = mlxbf_bootctl_probe,
+ .driver = {
+ .name = "mlxbf-bootctl",
+ .dev_groups = mlxbf_bootctl_groups,
+ .acpi_match_table = mlxbf_bootctl_acpi_ids,
+ }
+};
+
+module_platform_driver(mlxbf_bootctl_driver);
+
+MODULE_DESCRIPTION("Mellanox boot control driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/platform/mellanox/mlxbf-bootctl.h b/drivers/platform/mellanox/mlxbf-bootctl.h
new file mode 100644
index 000000000..148fdb43b
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-bootctl.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_BOOTCTL_H__
+#define __MLXBF_BOOTCTL_H__
+
+/*
+ * Request that the on-chip watchdog be enabled, or disabled, after
+ * the next chip soft reset. This call does not affect the current
+ * status of the on-chip watchdog. If non-zero, the argument
+ * specifies the watchdog interval in seconds. If zero, the watchdog
+ * will not be enabled after the next soft reset. Non-zero errors are
+ * returned as documented below.
+ */
+#define MLXBF_BOOTCTL_SET_POST_RESET_WDOG 0x82000000
+
+/*
+ * Query the status which has been requested for the on-chip watchdog
+ * after the next chip soft reset. Returns the interval as set by
+ * MLXBF_BOOTCTL_SET_POST_RESET_WDOG.
+ */
+#define MLXBF_BOOTCTL_GET_POST_RESET_WDOG 0x82000001
+
+/*
+ * Request that a specific boot action be taken at the next soft
+ * reset. By default, the boot action is set by external chip pins,
+ * which are sampled on hard reset. Note that the boot action
+ * requested by this call will persist on subsequent resets unless
+ * this service, or the MLNX_SET_SECOND_RESET_ACTION service, is
+ * invoked. See below for the available MLNX_BOOT_xxx parameter
+ * values. Non-zero errors are returned as documented below.
+ */
+#define MLXBF_BOOTCTL_SET_RESET_ACTION 0x82000002
+
+/*
+ * Return the specific boot action which will be taken at the next
+ * soft reset. Returns the reset action (see below for the parameter
+ * values for MLXBF_BOOTCTL_SET_RESET_ACTION).
+ */
+#define MLXBF_BOOTCTL_GET_RESET_ACTION 0x82000003
+
+/*
+ * Request that a specific boot action be taken at the soft reset
+ * after the next soft reset. For a specified valid boot mode, the
+ * effect of this call is identical to that of invoking
+ * MLXBF_BOOTCTL_SET_RESET_ACTION after the next chip soft reset; in
+ * particular, after that reset, the action for the now next reset can
+ * be queried with MLXBF_BOOTCTL_GET_RESET_ACTION and modified with
+ * MLXBF_BOOTCTL_SET_RESET_ACTION. You may also specify the parameter as
+ * MLNX_BOOT_NONE, which is equivalent to specifying that no call to
+ * MLXBF_BOOTCTL_SET_RESET_ACTION be taken after the next chip soft reset.
+ * This call does not affect the action to be taken at the next soft
+ * reset. Non-zero errors are returned as documented below.
+ */
+#define MLXBF_BOOTCTL_SET_SECOND_RESET_ACTION 0x82000004
+
+/*
+ * Return the specific boot action which will be taken at the soft
+ * reset after the next soft reset; this will be one of the valid
+ * actions for MLXBF_BOOTCTL_SET_SECOND_RESET_ACTION.
+ */
+#define MLXBF_BOOTCTL_GET_SECOND_RESET_ACTION 0x82000005
+
+/*
+ * Return the fuse status of the current chip. The caller should specify
+ * with the second argument if the state of the lifecycle fuses or the
+ * version of secure boot fuse keys left should be returned.
+ */
+#define MLXBF_BOOTCTL_GET_TBB_FUSE_STATUS 0x82000006
+
+/* Reset eMMC by programming the RST_N register. */
+#define MLXBF_BOOTCTL_SET_EMMC_RST_N 0x82000007
+
+#define MLXBF_BOOTCTL_GET_DIMM_INFO 0x82000008
+
+/* SMC function IDs for SiP Service queries */
+#define MLXBF_BOOTCTL_SIP_SVC_CALL_COUNT 0x8200ff00
+#define MLXBF_BOOTCTL_SIP_SVC_UID 0x8200ff01
+#define MLXBF_BOOTCTL_SIP_SVC_VERSION 0x8200ff03
+
+/* ARM Standard Service Calls version numbers */
+#define MLXBF_BOOTCTL_SVC_VERSION_MAJOR 0x0
+#define MLXBF_BOOTCTL_SVC_VERSION_MINOR 0x2
+
+/* Number of svc calls defined. */
+#define MLXBF_BOOTCTL_NUM_SVC_CALLS 12
+
+/* Valid reset actions for MLXBF_BOOTCTL_SET_RESET_ACTION. */
+#define MLXBF_BOOTCTL_EXTERNAL 0 /* Not boot from eMMC */
+#define MLXBF_BOOTCTL_EMMC 1 /* From primary eMMC boot partition */
+#define MLNX_BOOTCTL_SWAP_EMMC 2 /* Swap eMMC boot partitions and reboot */
+#define MLXBF_BOOTCTL_EMMC_LEGACY 3 /* From primary eMMC in legacy mode */
+
+/* Valid arguments for requesting the fuse status. */
+#define MLXBF_BOOTCTL_FUSE_STATUS_LIFECYCLE 0 /* Return lifecycle status. */
+#define MLXBF_BOOTCTL_FUSE_STATUS_KEYS 1 /* Return secure boot key status */
+
+/* Additional value to disable the MLXBF_BOOTCTL_SET_SECOND_RESET_ACTION. */
+#define MLXBF_BOOTCTL_NONE 0x7fffffff /* Don't change next boot action */
+
+#endif /* __MLXBF_BOOTCTL_H__ */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo-regs.h b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
new file mode 100644
index 000000000..e4f0d2eda
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo-regs.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2019, Mellanox Technologies. All rights reserved.
+ */
+
+#ifndef __MLXBF_TMFIFO_REGS_H__
+#define __MLXBF_TMFIFO_REGS_H__
+
+#include <linux/types.h>
+#include <linux/bits.h>
+
+#define MLXBF_TMFIFO_TX_DATA 0x00
+#define MLXBF_TMFIFO_TX_STS 0x08
+#define MLXBF_TMFIFO_TX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_TX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_TX_STS__COUNT_RMASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_STS__COUNT_MASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL 0x10
+#define MLXBF_TMFIFO_TX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_TX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_TX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__LWM_RMASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__LWM_MASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_TX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_TX_CTL__HWM_RMASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_TX_CTL__HWM_MASK GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_RMASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK GENMASK_ULL(40, 32)
+#define MLXBF_TMFIFO_RX_DATA 0x00
+#define MLXBF_TMFIFO_RX_STS 0x08
+#define MLXBF_TMFIFO_RX_STS__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_STS__COUNT_SHIFT 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_WIDTH 9
+#define MLXBF_TMFIFO_RX_STS__COUNT_RESET_VAL 0
+#define MLXBF_TMFIFO_RX_STS__COUNT_RMASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_STS__COUNT_MASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL 0x10
+#define MLXBF_TMFIFO_RX_CTL__LENGTH 0x0001
+#define MLXBF_TMFIFO_RX_CTL__LWM_SHIFT 0
+#define MLXBF_TMFIFO_RX_CTL__LWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__LWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__LWM_RMASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__LWM_MASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_SHIFT 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_WIDTH 8
+#define MLXBF_TMFIFO_RX_CTL__HWM_RESET_VAL 128
+#define MLXBF_TMFIFO_RX_CTL__HWM_RMASK GENMASK_ULL(7, 0)
+#define MLXBF_TMFIFO_RX_CTL__HWM_MASK GENMASK_ULL(15, 8)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_SHIFT 32
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_WIDTH 9
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RESET_VAL 256
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_RMASK GENMASK_ULL(8, 0)
+#define MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK GENMASK_ULL(40, 32)
+
+#endif /* !defined(__MLXBF_TMFIFO_REGS_H__) */
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
new file mode 100644
index 000000000..767f4406e
--- /dev/null
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -0,0 +1,1342 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox BlueField SoC TmFifo driver
+ *
+ * Copyright (C) 2019 Mellanox Technologies
+ */
+
+#include <linux/acpi.h>
+#include <linux/bitfield.h>
+#include <linux/circ_buf.h>
+#include <linux/efi.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+
+#include "mlxbf-tmfifo-regs.h"
+
+/* Vring size. */
+#define MLXBF_TMFIFO_VRING_SIZE SZ_1K
+
+/* Console Tx buffer size. */
+#define MLXBF_TMFIFO_CON_TX_BUF_SIZE SZ_32K
+
+/* Console Tx buffer reserved space. */
+#define MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE 8
+
+/* House-keeping timer interval. */
+#define MLXBF_TMFIFO_TIMER_INTERVAL (HZ / 10)
+
+/* Virtual devices sharing the TM FIFO. */
+#define MLXBF_TMFIFO_VDEV_MAX (VIRTIO_ID_CONSOLE + 1)
+
+/*
+ * Reserve 1/16 of TmFifo space, so console messages are not starved by
+ * the networking traffic.
+ */
+#define MLXBF_TMFIFO_RESERVE_RATIO 16
+
+/* Message with data needs at least two words (for header & data). */
+#define MLXBF_TMFIFO_DATA_MIN_WORDS 2
+
+struct mlxbf_tmfifo;
+
+/**
+ * mlxbf_tmfifo_vring - Structure of the TmFifo virtual ring
+ * @va: virtual address of the ring
+ * @dma: dma address of the ring
+ * @vq: pointer to the virtio virtqueue
+ * @desc: current descriptor of the pending packet
+ * @desc_head: head descriptor of the pending packet
+ * @drop_desc: dummy desc for packet dropping
+ * @cur_len: processed length of the current descriptor
+ * @rem_len: remaining length of the pending packet
+ * @pkt_len: total length of the pending packet
+ * @next_avail: next avail descriptor id
+ * @num: vring size (number of descriptors)
+ * @align: vring alignment size
+ * @index: vring index
+ * @vdev_id: vring virtio id (VIRTIO_ID_xxx)
+ * @fifo: pointer to the tmfifo structure
+ */
+struct mlxbf_tmfifo_vring {
+ void *va;
+ dma_addr_t dma;
+ struct virtqueue *vq;
+ struct vring_desc *desc;
+ struct vring_desc *desc_head;
+ struct vring_desc drop_desc;
+ int cur_len;
+ int rem_len;
+ u32 pkt_len;
+ u16 next_avail;
+ int num;
+ int align;
+ int index;
+ int vdev_id;
+ struct mlxbf_tmfifo *fifo;
+};
+
+/* Check whether vring is in drop mode. */
+#define IS_VRING_DROP(_r) ({ \
+ typeof(_r) (r) = (_r); \
+ (r->desc_head == &r->drop_desc ? true : false); })
+
+/* A stub length to drop maximum length packet. */
+#define VRING_DROP_DESC_MAX_LEN GENMASK(15, 0)
+
+/* Interrupt types. */
+enum {
+ MLXBF_TM_RX_LWM_IRQ,
+ MLXBF_TM_RX_HWM_IRQ,
+ MLXBF_TM_TX_LWM_IRQ,
+ MLXBF_TM_TX_HWM_IRQ,
+ MLXBF_TM_MAX_IRQ
+};
+
+/* Ring types (Rx & Tx). */
+enum {
+ MLXBF_TMFIFO_VRING_RX,
+ MLXBF_TMFIFO_VRING_TX,
+ MLXBF_TMFIFO_VRING_MAX
+};
+
+/**
+ * mlxbf_tmfifo_vdev - Structure of the TmFifo virtual device
+ * @vdev: virtio device, in which the vdev.id.device field has the
+ * VIRTIO_ID_xxx id to distinguish the virtual device.
+ * @status: status of the device
+ * @features: supported features of the device
+ * @vrings: array of tmfifo vrings of this device
+ * @config.cons: virtual console config -
+ * select if vdev.id.device is VIRTIO_ID_CONSOLE
+ * @config.net: virtual network config -
+ * select if vdev.id.device is VIRTIO_ID_NET
+ * @tx_buf: tx buffer used to buffer data before writing into the FIFO
+ */
+struct mlxbf_tmfifo_vdev {
+ struct virtio_device vdev;
+ u8 status;
+ u64 features;
+ struct mlxbf_tmfifo_vring vrings[MLXBF_TMFIFO_VRING_MAX];
+ union {
+ struct virtio_console_config cons;
+ struct virtio_net_config net;
+ } config;
+ struct circ_buf tx_buf;
+};
+
+/**
+ * mlxbf_tmfifo_irq_info - Structure of the interrupt information
+ * @fifo: pointer to the tmfifo structure
+ * @irq: interrupt number
+ * @index: index into the interrupt array
+ */
+struct mlxbf_tmfifo_irq_info {
+ struct mlxbf_tmfifo *fifo;
+ int irq;
+ int index;
+};
+
+/**
+ * mlxbf_tmfifo - Structure of the TmFifo
+ * @vdev: array of the virtual devices running over the TmFifo
+ * @lock: lock to protect the TmFifo access
+ * @rx_base: mapped register base address for the Rx FIFO
+ * @tx_base: mapped register base address for the Tx FIFO
+ * @rx_fifo_size: number of entries of the Rx FIFO
+ * @tx_fifo_size: number of entries of the Tx FIFO
+ * @pend_events: pending bits for deferred events
+ * @irq_info: interrupt information
+ * @work: work struct for deferred process
+ * @timer: background timer
+ * @vring: Tx/Rx ring
+ * @spin_lock: Tx/Rx spin lock
+ * @is_ready: ready flag
+ */
+struct mlxbf_tmfifo {
+ struct mlxbf_tmfifo_vdev *vdev[MLXBF_TMFIFO_VDEV_MAX];
+ struct mutex lock; /* TmFifo lock */
+ void __iomem *rx_base;
+ void __iomem *tx_base;
+ int rx_fifo_size;
+ int tx_fifo_size;
+ unsigned long pend_events;
+ struct mlxbf_tmfifo_irq_info irq_info[MLXBF_TM_MAX_IRQ];
+ struct work_struct work;
+ struct timer_list timer;
+ struct mlxbf_tmfifo_vring *vring[2];
+ spinlock_t spin_lock[2]; /* spin lock */
+ bool is_ready;
+};
+
+/**
+ * mlxbf_tmfifo_msg_hdr - Structure of the TmFifo message header
+ * @type: message type
+ * @len: payload length in network byte order. Messages sent into the FIFO
+ * will be read by the other side as data stream in the same byte order.
+ * The length needs to be encoded into network order so both sides
+ * could understand it.
+ */
+struct mlxbf_tmfifo_msg_hdr {
+ u8 type;
+ __be16 len;
+ u8 unused[5];
+} __packed __aligned(sizeof(u64));
+
+/*
+ * Default MAC.
+ * This MAC address will be read from EFI persistent variable if configured.
+ * It can also be reconfigured with standard Linux tools.
+ */
+static u8 mlxbf_tmfifo_net_default_mac[ETH_ALEN] = {
+ 0x00, 0x1A, 0xCA, 0xFF, 0xFF, 0x01
+};
+
+/* EFI variable name of the MAC address. */
+static efi_char16_t mlxbf_tmfifo_efi_name[] = L"RshimMacAddr";
+
+/* Maximum L2 header length. */
+#define MLXBF_TMFIFO_NET_L2_OVERHEAD (ETH_HLEN + VLAN_HLEN)
+
+/* Supported virtio-net features. */
+#define MLXBF_TMFIFO_NET_FEATURES \
+ (BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_STATUS) | \
+ BIT_ULL(VIRTIO_NET_F_MAC))
+
+#define mlxbf_vdev_to_tmfifo(d) container_of(d, struct mlxbf_tmfifo_vdev, vdev)
+
+/* Free vrings of the FIFO device. */
+static void mlxbf_tmfifo_free_vrings(struct mlxbf_tmfifo *fifo,
+ struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+ struct mlxbf_tmfifo_vring *vring;
+ int i, size;
+
+ for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+ vring = &tm_vdev->vrings[i];
+ if (vring->va) {
+ size = vring_size(vring->num, vring->align);
+ dma_free_coherent(tm_vdev->vdev.dev.parent, size,
+ vring->va, vring->dma);
+ vring->va = NULL;
+ if (vring->vq) {
+ vring_del_virtqueue(vring->vq);
+ vring->vq = NULL;
+ }
+ }
+ }
+}
+
+/* Allocate vrings for the FIFO. */
+static int mlxbf_tmfifo_alloc_vrings(struct mlxbf_tmfifo *fifo,
+ struct mlxbf_tmfifo_vdev *tm_vdev)
+{
+ struct mlxbf_tmfifo_vring *vring;
+ struct device *dev;
+ dma_addr_t dma;
+ int i, size;
+ void *va;
+
+ for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+ vring = &tm_vdev->vrings[i];
+ vring->fifo = fifo;
+ vring->num = MLXBF_TMFIFO_VRING_SIZE;
+ vring->align = SMP_CACHE_BYTES;
+ vring->index = i;
+ vring->vdev_id = tm_vdev->vdev.id.device;
+ vring->drop_desc.len = VRING_DROP_DESC_MAX_LEN;
+ dev = &tm_vdev->vdev.dev;
+
+ size = vring_size(vring->num, vring->align);
+ va = dma_alloc_coherent(dev->parent, size, &dma, GFP_KERNEL);
+ if (!va) {
+ mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+ dev_err(dev->parent, "dma_alloc_coherent failed\n");
+ return -ENOMEM;
+ }
+
+ vring->va = va;
+ vring->dma = dma;
+ }
+
+ return 0;
+}
+
+/* Disable interrupts of the FIFO device. */
+static void mlxbf_tmfifo_disable_irqs(struct mlxbf_tmfifo *fifo)
+{
+ int i, irq;
+
+ for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+ irq = fifo->irq_info[i].irq;
+ fifo->irq_info[i].irq = 0;
+ disable_irq(irq);
+ }
+}
+
+/* Interrupt handler. */
+static irqreturn_t mlxbf_tmfifo_irq_handler(int irq, void *arg)
+{
+ struct mlxbf_tmfifo_irq_info *irq_info = arg;
+
+ if (!test_and_set_bit(irq_info->index, &irq_info->fifo->pend_events))
+ schedule_work(&irq_info->fifo->work);
+
+ return IRQ_HANDLED;
+}
+
+/* Get the next packet descriptor from the vring. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_desc(struct mlxbf_tmfifo_vring *vring)
+{
+ const struct vring *vr = virtqueue_get_vring(vring->vq);
+ struct virtio_device *vdev = vring->vq->vdev;
+ unsigned int idx, head;
+
+ if (vring->next_avail == virtio16_to_cpu(vdev, vr->avail->idx))
+ return NULL;
+
+ /* Make sure 'avail->idx' is visible already. */
+ virtio_rmb(false);
+
+ idx = vring->next_avail % vr->num;
+ head = virtio16_to_cpu(vdev, vr->avail->ring[idx]);
+ if (WARN_ON(head >= vr->num))
+ return NULL;
+
+ vring->next_avail++;
+
+ return &vr->desc[head];
+}
+
+/* Release virtio descriptor. */
+static void mlxbf_tmfifo_release_desc(struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc *desc, u32 len)
+{
+ const struct vring *vr = virtqueue_get_vring(vring->vq);
+ struct virtio_device *vdev = vring->vq->vdev;
+ u16 idx, vr_idx;
+
+ vr_idx = virtio16_to_cpu(vdev, vr->used->idx);
+ idx = vr_idx % vr->num;
+ vr->used->ring[idx].id = cpu_to_virtio32(vdev, desc - vr->desc);
+ vr->used->ring[idx].len = cpu_to_virtio32(vdev, len);
+
+ /*
+ * Virtio could poll and check the 'idx' to decide whether the desc is
+ * done or not. Add a memory barrier here to make sure the update above
+ * completes before updating the idx.
+ */
+ virtio_mb(false);
+ vr->used->idx = cpu_to_virtio16(vdev, vr_idx + 1);
+}
+
+/* Get the total length of the descriptor chain. */
+static u32 mlxbf_tmfifo_get_pkt_len(struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc *desc)
+{
+ const struct vring *vr = virtqueue_get_vring(vring->vq);
+ struct virtio_device *vdev = vring->vq->vdev;
+ u32 len = 0, idx;
+
+ while (desc) {
+ len += virtio32_to_cpu(vdev, desc->len);
+ if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+ break;
+ idx = virtio16_to_cpu(vdev, desc->next);
+ desc = &vr->desc[idx];
+ }
+
+ return len;
+}
+
+static void mlxbf_tmfifo_release_pkt(struct mlxbf_tmfifo_vring *vring)
+{
+ struct vring_desc *desc_head;
+ u32 len = 0;
+
+ if (vring->desc_head) {
+ desc_head = vring->desc_head;
+ len = vring->pkt_len;
+ } else {
+ desc_head = mlxbf_tmfifo_get_next_desc(vring);
+ len = mlxbf_tmfifo_get_pkt_len(vring, desc_head);
+ }
+
+ if (desc_head)
+ mlxbf_tmfifo_release_desc(vring, desc_head, len);
+
+ vring->pkt_len = 0;
+ vring->desc = NULL;
+ vring->desc_head = NULL;
+}
+
+static void mlxbf_tmfifo_init_net_desc(struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc *desc, bool is_rx)
+{
+ struct virtio_device *vdev = vring->vq->vdev;
+ struct virtio_net_hdr *net_hdr;
+
+ net_hdr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+ memset(net_hdr, 0, sizeof(*net_hdr));
+}
+
+/* Get and initialize the next packet. */
+static struct vring_desc *
+mlxbf_tmfifo_get_next_pkt(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+ struct vring_desc *desc;
+
+ desc = mlxbf_tmfifo_get_next_desc(vring);
+ if (desc && is_rx && vring->vdev_id == VIRTIO_ID_NET)
+ mlxbf_tmfifo_init_net_desc(vring, desc, is_rx);
+
+ vring->desc_head = desc;
+ vring->desc = desc;
+
+ return desc;
+}
+
+/* House-keeping timer. */
+static void mlxbf_tmfifo_timer(struct timer_list *t)
+{
+ struct mlxbf_tmfifo *fifo = container_of(t, struct mlxbf_tmfifo, timer);
+ int rx, tx;
+
+ rx = !test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events);
+ tx = !test_and_set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+
+ if (rx || tx)
+ schedule_work(&fifo->work);
+
+ mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+}
+
+/* Copy one console packet into the output buffer. */
+static void mlxbf_tmfifo_console_output_one(struct mlxbf_tmfifo_vdev *cons,
+ struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc *desc)
+{
+ const struct vring *vr = virtqueue_get_vring(vring->vq);
+ struct virtio_device *vdev = &cons->vdev;
+ u32 len, idx, seg;
+ void *addr;
+
+ while (desc) {
+ addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+ len = virtio32_to_cpu(vdev, desc->len);
+
+ seg = CIRC_SPACE_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+ if (len <= seg) {
+ memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, len);
+ } else {
+ memcpy(cons->tx_buf.buf + cons->tx_buf.head, addr, seg);
+ addr += seg;
+ memcpy(cons->tx_buf.buf, addr, len - seg);
+ }
+ cons->tx_buf.head = (cons->tx_buf.head + len) %
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+
+ if (!(virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT))
+ break;
+ idx = virtio16_to_cpu(vdev, desc->next);
+ desc = &vr->desc[idx];
+ }
+}
+
+/* Copy console data into the output buffer. */
+static void mlxbf_tmfifo_console_output(struct mlxbf_tmfifo_vdev *cons,
+ struct mlxbf_tmfifo_vring *vring)
+{
+ struct vring_desc *desc;
+ u32 len, avail;
+
+ desc = mlxbf_tmfifo_get_next_desc(vring);
+ while (desc) {
+ /* Release the packet if not enough space. */
+ len = mlxbf_tmfifo_get_pkt_len(vring, desc);
+ avail = CIRC_SPACE(cons->tx_buf.head, cons->tx_buf.tail,
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+ if (len + MLXBF_TMFIFO_CON_TX_BUF_RSV_SIZE > avail) {
+ mlxbf_tmfifo_release_desc(vring, desc, len);
+ break;
+ }
+
+ mlxbf_tmfifo_console_output_one(cons, vring, desc);
+ mlxbf_tmfifo_release_desc(vring, desc, len);
+ desc = mlxbf_tmfifo_get_next_desc(vring);
+ }
+}
+
+/* Get the number of available words in Rx FIFO for receiving. */
+static int mlxbf_tmfifo_get_rx_avail(struct mlxbf_tmfifo *fifo)
+{
+ u64 sts;
+
+ sts = readq(fifo->rx_base + MLXBF_TMFIFO_RX_STS);
+ return FIELD_GET(MLXBF_TMFIFO_RX_STS__COUNT_MASK, sts);
+}
+
+/* Get the number of available words in the TmFifo for sending. */
+static int mlxbf_tmfifo_get_tx_avail(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+ int tx_reserve;
+ u32 count;
+ u64 sts;
+
+ /* Reserve some room in FIFO for console messages. */
+ if (vdev_id == VIRTIO_ID_NET)
+ tx_reserve = fifo->tx_fifo_size / MLXBF_TMFIFO_RESERVE_RATIO;
+ else
+ tx_reserve = 1;
+
+ sts = readq(fifo->tx_base + MLXBF_TMFIFO_TX_STS);
+ count = FIELD_GET(MLXBF_TMFIFO_TX_STS__COUNT_MASK, sts);
+ return fifo->tx_fifo_size - tx_reserve - count;
+}
+
+/* Console Tx (move data from the output buffer into the TmFifo). */
+static void mlxbf_tmfifo_console_tx(struct mlxbf_tmfifo *fifo, int avail)
+{
+ struct mlxbf_tmfifo_msg_hdr hdr;
+ struct mlxbf_tmfifo_vdev *cons;
+ unsigned long flags;
+ int size, seg;
+ void *addr;
+ u64 data;
+
+ /* Return if not enough space available. */
+ if (avail < MLXBF_TMFIFO_DATA_MIN_WORDS)
+ return;
+
+ cons = fifo->vdev[VIRTIO_ID_CONSOLE];
+ if (!cons || !cons->tx_buf.buf)
+ return;
+
+ /* Return if no data to send. */
+ size = CIRC_CNT(cons->tx_buf.head, cons->tx_buf.tail,
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+ if (size == 0)
+ return;
+
+ /* Adjust the size to available space. */
+ if (size + sizeof(hdr) > avail * sizeof(u64))
+ size = avail * sizeof(u64) - sizeof(hdr);
+
+ /* Write header. */
+ hdr.type = VIRTIO_ID_CONSOLE;
+ hdr.len = htons(size);
+ writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+ /* Use spin-lock to protect the 'cons->tx_buf'. */
+ spin_lock_irqsave(&fifo->spin_lock[0], flags);
+
+ while (size > 0) {
+ addr = cons->tx_buf.buf + cons->tx_buf.tail;
+
+ seg = CIRC_CNT_TO_END(cons->tx_buf.head, cons->tx_buf.tail,
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE);
+ if (seg >= sizeof(u64)) {
+ memcpy(&data, addr, sizeof(u64));
+ } else {
+ memcpy(&data, addr, seg);
+ memcpy((u8 *)&data + seg, cons->tx_buf.buf,
+ sizeof(u64) - seg);
+ }
+ writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+
+ if (size >= sizeof(u64)) {
+ cons->tx_buf.tail = (cons->tx_buf.tail + sizeof(u64)) %
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+ size -= sizeof(u64);
+ } else {
+ cons->tx_buf.tail = (cons->tx_buf.tail + size) %
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE;
+ size = 0;
+ }
+ }
+
+ spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
+}
+
+/* Rx/Tx one word in the descriptor buffer. */
+static void mlxbf_tmfifo_rxtx_word(struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc *desc,
+ bool is_rx, int len)
+{
+ struct virtio_device *vdev = vring->vq->vdev;
+ struct mlxbf_tmfifo *fifo = vring->fifo;
+ void *addr;
+ u64 data;
+
+ /* Get the buffer address of this desc. */
+ addr = phys_to_virt(virtio64_to_cpu(vdev, desc->addr));
+
+ /* Read a word from FIFO for Rx. */
+ if (is_rx)
+ data = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+ if (vring->cur_len + sizeof(u64) <= len) {
+ /* The whole word. */
+ if (is_rx) {
+ if (!IS_VRING_DROP(vring))
+ memcpy(addr + vring->cur_len, &data,
+ sizeof(u64));
+ } else {
+ memcpy(&data, addr + vring->cur_len,
+ sizeof(u64));
+ }
+ vring->cur_len += sizeof(u64);
+ } else {
+ /* Leftover bytes. */
+ if (is_rx) {
+ if (!IS_VRING_DROP(vring))
+ memcpy(addr + vring->cur_len, &data,
+ len - vring->cur_len);
+ } else {
+ data = 0;
+ memcpy(&data, addr + vring->cur_len,
+ len - vring->cur_len);
+ }
+ vring->cur_len = len;
+ }
+
+ /* Write the word into FIFO for Tx. */
+ if (!is_rx)
+ writeq(data, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+}
+
+/*
+ * Rx/Tx packet header.
+ *
+ * In Rx case, the packet might be found to belong to a different vring since
+ * the TmFifo is shared by different services. In such case, the 'vring_change'
+ * flag is set.
+ */
+static void mlxbf_tmfifo_rxtx_header(struct mlxbf_tmfifo_vring *vring,
+ struct vring_desc **desc,
+ bool is_rx, bool *vring_change)
+{
+ struct mlxbf_tmfifo *fifo = vring->fifo;
+ struct virtio_net_config *config;
+ struct mlxbf_tmfifo_msg_hdr hdr;
+ int vdev_id, hdr_len;
+ bool drop_rx = false;
+
+ /* Read/Write packet header. */
+ if (is_rx) {
+ /* Drain one word from the FIFO. */
+ *(u64 *)&hdr = readq(fifo->rx_base + MLXBF_TMFIFO_RX_DATA);
+
+ /* Skip the length 0 packets (keepalive). */
+ if (hdr.len == 0)
+ return;
+
+ /* Check packet type. */
+ if (hdr.type == VIRTIO_ID_NET) {
+ vdev_id = VIRTIO_ID_NET;
+ hdr_len = sizeof(struct virtio_net_hdr);
+ config = &fifo->vdev[vdev_id]->config.net;
+ /* A legacy-only interface for now. */
+ if (ntohs(hdr.len) >
+ __virtio16_to_cpu(virtio_legacy_is_little_endian(),
+ config->mtu) +
+ MLXBF_TMFIFO_NET_L2_OVERHEAD)
+ drop_rx = true;
+ } else {
+ vdev_id = VIRTIO_ID_CONSOLE;
+ hdr_len = 0;
+ }
+
+ /*
+ * Check whether the new packet still belongs to this vring.
+ * If not, update the pkt_len of the new vring.
+ */
+ if (vdev_id != vring->vdev_id) {
+ struct mlxbf_tmfifo_vdev *tm_dev2 = fifo->vdev[vdev_id];
+
+ if (!tm_dev2)
+ return;
+ vring->desc = *desc;
+ vring = &tm_dev2->vrings[MLXBF_TMFIFO_VRING_RX];
+ *vring_change = true;
+ }
+
+ if (drop_rx && !IS_VRING_DROP(vring)) {
+ if (vring->desc_head)
+ mlxbf_tmfifo_release_pkt(vring);
+ *desc = &vring->drop_desc;
+ vring->desc_head = *desc;
+ vring->desc = *desc;
+ }
+
+ vring->pkt_len = ntohs(hdr.len) + hdr_len;
+ } else {
+ /* Network virtio has an extra header. */
+ hdr_len = (vring->vdev_id == VIRTIO_ID_NET) ?
+ sizeof(struct virtio_net_hdr) : 0;
+ vring->pkt_len = mlxbf_tmfifo_get_pkt_len(vring, *desc);
+ hdr.type = (vring->vdev_id == VIRTIO_ID_NET) ?
+ VIRTIO_ID_NET : VIRTIO_ID_CONSOLE;
+ hdr.len = htons(vring->pkt_len - hdr_len);
+ writeq(*(u64 *)&hdr, fifo->tx_base + MLXBF_TMFIFO_TX_DATA);
+ }
+
+ vring->cur_len = hdr_len;
+ vring->rem_len = vring->pkt_len;
+ fifo->vring[is_rx] = vring;
+}
+
+/*
+ * Rx/Tx one descriptor.
+ *
+ * Return true to indicate more data available.
+ */
+static bool mlxbf_tmfifo_rxtx_one_desc(struct mlxbf_tmfifo_vring *vring,
+ bool is_rx, int *avail)
+{
+ const struct vring *vr = virtqueue_get_vring(vring->vq);
+ struct mlxbf_tmfifo *fifo = vring->fifo;
+ struct virtio_device *vdev;
+ bool vring_change = false;
+ struct vring_desc *desc;
+ unsigned long flags;
+ u32 len, idx;
+
+ vdev = &fifo->vdev[vring->vdev_id]->vdev;
+
+ /* Get the descriptor of the next packet. */
+ if (!vring->desc) {
+ desc = mlxbf_tmfifo_get_next_pkt(vring, is_rx);
+ if (!desc) {
+ /* Drop next Rx packet to avoid stuck. */
+ if (is_rx) {
+ desc = &vring->drop_desc;
+ vring->desc_head = desc;
+ vring->desc = desc;
+ } else {
+ return false;
+ }
+ }
+ } else {
+ desc = vring->desc;
+ }
+
+ /* Beginning of a packet. Start to Rx/Tx packet header. */
+ if (vring->pkt_len == 0) {
+ mlxbf_tmfifo_rxtx_header(vring, &desc, is_rx, &vring_change);
+ (*avail)--;
+
+ /* Return if new packet is for another ring. */
+ if (vring_change)
+ return false;
+ goto mlxbf_tmfifo_desc_done;
+ }
+
+ /* Get the length of this desc. */
+ len = virtio32_to_cpu(vdev, desc->len);
+ if (len > vring->rem_len)
+ len = vring->rem_len;
+
+ /* Rx/Tx one word (8 bytes) if not done. */
+ if (vring->cur_len < len) {
+ mlxbf_tmfifo_rxtx_word(vring, desc, is_rx, len);
+ (*avail)--;
+ }
+
+ /* Check again whether it's done. */
+ if (vring->cur_len == len) {
+ vring->cur_len = 0;
+ vring->rem_len -= len;
+
+ /* Get the next desc on the chain. */
+ if (!IS_VRING_DROP(vring) && vring->rem_len > 0 &&
+ (virtio16_to_cpu(vdev, desc->flags) & VRING_DESC_F_NEXT)) {
+ idx = virtio16_to_cpu(vdev, desc->next);
+ desc = &vr->desc[idx];
+ goto mlxbf_tmfifo_desc_done;
+ }
+
+ /* Done and release the packet. */
+ desc = NULL;
+ fifo->vring[is_rx] = NULL;
+ if (!IS_VRING_DROP(vring)) {
+ mlxbf_tmfifo_release_pkt(vring);
+ } else {
+ vring->pkt_len = 0;
+ vring->desc_head = NULL;
+ vring->desc = NULL;
+ return false;
+ }
+
+ /*
+ * Make sure the load/store are in order before
+ * returning back to virtio.
+ */
+ virtio_mb(false);
+
+ /* Notify upper layer that packet is done. */
+ spin_lock_irqsave(&fifo->spin_lock[is_rx], flags);
+ vring_interrupt(0, vring->vq);
+ spin_unlock_irqrestore(&fifo->spin_lock[is_rx], flags);
+ }
+
+mlxbf_tmfifo_desc_done:
+ /* Save the current desc. */
+ vring->desc = desc;
+
+ return true;
+}
+
+/* Rx & Tx processing of a queue. */
+static void mlxbf_tmfifo_rxtx(struct mlxbf_tmfifo_vring *vring, bool is_rx)
+{
+ int avail = 0, devid = vring->vdev_id;
+ struct mlxbf_tmfifo *fifo;
+ bool more;
+
+ fifo = vring->fifo;
+
+ /* Return if vdev is not ready. */
+ if (!fifo->vdev[devid])
+ return;
+
+ /* Return if another vring is running. */
+ if (fifo->vring[is_rx] && fifo->vring[is_rx] != vring)
+ return;
+
+ /* Only handle console and network for now. */
+ if (WARN_ON(devid != VIRTIO_ID_NET && devid != VIRTIO_ID_CONSOLE))
+ return;
+
+ do {
+ /* Get available FIFO space. */
+ if (avail == 0) {
+ if (is_rx)
+ avail = mlxbf_tmfifo_get_rx_avail(fifo);
+ else
+ avail = mlxbf_tmfifo_get_tx_avail(fifo, devid);
+ if (avail <= 0)
+ break;
+ }
+
+ /* Console output always comes from the Tx buffer. */
+ if (!is_rx && devid == VIRTIO_ID_CONSOLE) {
+ mlxbf_tmfifo_console_tx(fifo, avail);
+ break;
+ }
+
+ /* Handle one descriptor. */
+ more = mlxbf_tmfifo_rxtx_one_desc(vring, is_rx, &avail);
+ } while (more);
+}
+
+/* Handle Rx or Tx queues. */
+static void mlxbf_tmfifo_work_rxtx(struct mlxbf_tmfifo *fifo, int queue_id,
+ int irq_id, bool is_rx)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev;
+ struct mlxbf_tmfifo_vring *vring;
+ int i;
+
+ if (!test_and_clear_bit(irq_id, &fifo->pend_events) ||
+ !fifo->irq_info[irq_id].irq)
+ return;
+
+ for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++) {
+ tm_vdev = fifo->vdev[i];
+ if (tm_vdev) {
+ vring = &tm_vdev->vrings[queue_id];
+ if (vring->vq)
+ mlxbf_tmfifo_rxtx(vring, is_rx);
+ }
+ }
+}
+
+/* Work handler for Rx and Tx case. */
+static void mlxbf_tmfifo_work_handler(struct work_struct *work)
+{
+ struct mlxbf_tmfifo *fifo;
+
+ fifo = container_of(work, struct mlxbf_tmfifo, work);
+ if (!fifo->is_ready)
+ return;
+
+ mutex_lock(&fifo->lock);
+
+ /* Tx (Send data to the TmFifo). */
+ mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_TX,
+ MLXBF_TM_TX_LWM_IRQ, false);
+
+ /* Rx (Receive data from the TmFifo). */
+ mlxbf_tmfifo_work_rxtx(fifo, MLXBF_TMFIFO_VRING_RX,
+ MLXBF_TM_RX_HWM_IRQ, true);
+
+ mutex_unlock(&fifo->lock);
+}
+
+/* The notify function is called when new buffers are posted. */
+static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
+{
+ struct mlxbf_tmfifo_vring *vring = vq->priv;
+ struct mlxbf_tmfifo_vdev *tm_vdev;
+ struct mlxbf_tmfifo *fifo;
+ unsigned long flags;
+
+ fifo = vring->fifo;
+
+ /*
+ * Virtio maintains vrings in pairs, even number ring for Rx
+ * and odd number ring for Tx.
+ */
+ if (vring->index & BIT(0)) {
+ /*
+ * Console could make blocking call with interrupts disabled.
+ * In such case, the vring needs to be served right away. For
+ * other cases, just set the TX LWM bit to start Tx in the
+ * worker handler.
+ */
+ if (vring->vdev_id == VIRTIO_ID_CONSOLE) {
+ spin_lock_irqsave(&fifo->spin_lock[0], flags);
+ tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
+ mlxbf_tmfifo_console_output(tm_vdev, vring);
+ spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
+ set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
+ } else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
+ &fifo->pend_events)) {
+ return true;
+ }
+ } else {
+ if (test_and_set_bit(MLXBF_TM_RX_HWM_IRQ, &fifo->pend_events))
+ return true;
+ }
+
+ schedule_work(&fifo->work);
+
+ return true;
+}
+
+/* Get the array of feature bits for this device. */
+static u64 mlxbf_tmfifo_virtio_get_features(struct virtio_device *vdev)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ return tm_vdev->features;
+}
+
+/* Confirm device features to use. */
+static int mlxbf_tmfifo_virtio_finalize_features(struct virtio_device *vdev)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ tm_vdev->features = vdev->features;
+
+ return 0;
+}
+
+/* Free virtqueues found by find_vqs(). */
+static void mlxbf_tmfifo_virtio_del_vqs(struct virtio_device *vdev)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+ struct mlxbf_tmfifo_vring *vring;
+ struct virtqueue *vq;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tm_vdev->vrings); i++) {
+ vring = &tm_vdev->vrings[i];
+
+ /* Release the pending packet. */
+ if (vring->desc)
+ mlxbf_tmfifo_release_pkt(vring);
+ vq = vring->vq;
+ if (vq) {
+ vring->vq = NULL;
+ vring_del_virtqueue(vq);
+ }
+ }
+}
+
+/* Create and initialize the virtual queues. */
+static int mlxbf_tmfifo_virtio_find_vqs(struct virtio_device *vdev,
+ unsigned int nvqs,
+ struct virtqueue *vqs[],
+ vq_callback_t *callbacks[],
+ const char * const names[],
+ const bool *ctx,
+ struct irq_affinity *desc)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+ struct mlxbf_tmfifo_vring *vring;
+ struct virtqueue *vq;
+ int i, ret, size;
+
+ if (nvqs > ARRAY_SIZE(tm_vdev->vrings))
+ return -EINVAL;
+
+ for (i = 0; i < nvqs; ++i) {
+ if (!names[i]) {
+ ret = -EINVAL;
+ goto error;
+ }
+ vring = &tm_vdev->vrings[i];
+
+ /* zero vring */
+ size = vring_size(vring->num, vring->align);
+ memset(vring->va, 0, size);
+ vq = vring_new_virtqueue(i, vring->num, vring->align, vdev,
+ false, false, vring->va,
+ mlxbf_tmfifo_virtio_notify,
+ callbacks[i], names[i]);
+ if (!vq) {
+ dev_err(&vdev->dev, "vring_new_virtqueue failed\n");
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ vqs[i] = vq;
+ vring->vq = vq;
+ vq->priv = vring;
+ }
+
+ return 0;
+
+error:
+ mlxbf_tmfifo_virtio_del_vqs(vdev);
+ return ret;
+}
+
+/* Read the status byte. */
+static u8 mlxbf_tmfifo_virtio_get_status(struct virtio_device *vdev)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ return tm_vdev->status;
+}
+
+/* Write the status byte. */
+static void mlxbf_tmfifo_virtio_set_status(struct virtio_device *vdev,
+ u8 status)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ tm_vdev->status = status;
+}
+
+/* Reset the device. Not much here for now. */
+static void mlxbf_tmfifo_virtio_reset(struct virtio_device *vdev)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ tm_vdev->status = 0;
+}
+
+/* Read the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_get(struct virtio_device *vdev,
+ unsigned int offset,
+ void *buf,
+ unsigned int len)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ if ((u64)offset + len > sizeof(tm_vdev->config))
+ return;
+
+ memcpy(buf, (u8 *)&tm_vdev->config + offset, len);
+}
+
+/* Write the value of a configuration field. */
+static void mlxbf_tmfifo_virtio_set(struct virtio_device *vdev,
+ unsigned int offset,
+ const void *buf,
+ unsigned int len)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ if ((u64)offset + len > sizeof(tm_vdev->config))
+ return;
+
+ memcpy((u8 *)&tm_vdev->config + offset, buf, len);
+}
+
+static void tmfifo_virtio_dev_release(struct device *device)
+{
+ struct virtio_device *vdev =
+ container_of(device, struct virtio_device, dev);
+ struct mlxbf_tmfifo_vdev *tm_vdev = mlxbf_vdev_to_tmfifo(vdev);
+
+ kfree(tm_vdev);
+}
+
+/* Virtio config operations. */
+static const struct virtio_config_ops mlxbf_tmfifo_virtio_config_ops = {
+ .get_features = mlxbf_tmfifo_virtio_get_features,
+ .finalize_features = mlxbf_tmfifo_virtio_finalize_features,
+ .find_vqs = mlxbf_tmfifo_virtio_find_vqs,
+ .del_vqs = mlxbf_tmfifo_virtio_del_vqs,
+ .reset = mlxbf_tmfifo_virtio_reset,
+ .set_status = mlxbf_tmfifo_virtio_set_status,
+ .get_status = mlxbf_tmfifo_virtio_get_status,
+ .get = mlxbf_tmfifo_virtio_get,
+ .set = mlxbf_tmfifo_virtio_set,
+};
+
+/* Create vdev for the FIFO. */
+static int mlxbf_tmfifo_create_vdev(struct device *dev,
+ struct mlxbf_tmfifo *fifo,
+ int vdev_id, u64 features,
+ void *config, u32 size)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev, *reg_dev = NULL;
+ int ret;
+
+ mutex_lock(&fifo->lock);
+
+ tm_vdev = fifo->vdev[vdev_id];
+ if (tm_vdev) {
+ dev_err(dev, "vdev %d already exists\n", vdev_id);
+ ret = -EEXIST;
+ goto fail;
+ }
+
+ tm_vdev = kzalloc(sizeof(*tm_vdev), GFP_KERNEL);
+ if (!tm_vdev) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ tm_vdev->vdev.id.device = vdev_id;
+ tm_vdev->vdev.config = &mlxbf_tmfifo_virtio_config_ops;
+ tm_vdev->vdev.dev.parent = dev;
+ tm_vdev->vdev.dev.release = tmfifo_virtio_dev_release;
+ tm_vdev->features = features;
+ if (config)
+ memcpy(&tm_vdev->config, config, size);
+
+ if (mlxbf_tmfifo_alloc_vrings(fifo, tm_vdev)) {
+ dev_err(dev, "unable to allocate vring\n");
+ ret = -ENOMEM;
+ goto vdev_fail;
+ }
+
+ /* Allocate an output buffer for the console device. */
+ if (vdev_id == VIRTIO_ID_CONSOLE)
+ tm_vdev->tx_buf.buf = devm_kmalloc(dev,
+ MLXBF_TMFIFO_CON_TX_BUF_SIZE,
+ GFP_KERNEL);
+ fifo->vdev[vdev_id] = tm_vdev;
+
+ /* Register the virtio device. */
+ ret = register_virtio_device(&tm_vdev->vdev);
+ reg_dev = tm_vdev;
+ if (ret) {
+ dev_err(dev, "register_virtio_device failed\n");
+ goto vdev_fail;
+ }
+
+ mutex_unlock(&fifo->lock);
+ return 0;
+
+vdev_fail:
+ mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+ fifo->vdev[vdev_id] = NULL;
+ if (reg_dev)
+ put_device(&tm_vdev->vdev.dev);
+ else
+ kfree(tm_vdev);
+fail:
+ mutex_unlock(&fifo->lock);
+ return ret;
+}
+
+/* Delete vdev for the FIFO. */
+static int mlxbf_tmfifo_delete_vdev(struct mlxbf_tmfifo *fifo, int vdev_id)
+{
+ struct mlxbf_tmfifo_vdev *tm_vdev;
+
+ mutex_lock(&fifo->lock);
+
+ /* Unregister vdev. */
+ tm_vdev = fifo->vdev[vdev_id];
+ if (tm_vdev) {
+ unregister_virtio_device(&tm_vdev->vdev);
+ mlxbf_tmfifo_free_vrings(fifo, tm_vdev);
+ fifo->vdev[vdev_id] = NULL;
+ }
+
+ mutex_unlock(&fifo->lock);
+
+ return 0;
+}
+
+/* Read the configured network MAC address from efi variable. */
+static void mlxbf_tmfifo_get_cfg_mac(u8 *mac)
+{
+ efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
+ unsigned long size = ETH_ALEN;
+ u8 buf[ETH_ALEN];
+ efi_status_t rc;
+
+ rc = efi.get_variable(mlxbf_tmfifo_efi_name, &guid, NULL, &size, buf);
+ if (rc == EFI_SUCCESS && size == ETH_ALEN)
+ ether_addr_copy(mac, buf);
+ else
+ ether_addr_copy(mac, mlxbf_tmfifo_net_default_mac);
+}
+
+/* Set TmFifo thresolds which is used to trigger interrupts. */
+static void mlxbf_tmfifo_set_threshold(struct mlxbf_tmfifo *fifo)
+{
+ u64 ctl;
+
+ /* Get Tx FIFO size and set the low/high watermark. */
+ ctl = readq(fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+ fifo->tx_fifo_size =
+ FIELD_GET(MLXBF_TMFIFO_TX_CTL__MAX_ENTRIES_MASK, ctl);
+ ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__LWM_MASK) |
+ FIELD_PREP(MLXBF_TMFIFO_TX_CTL__LWM_MASK,
+ fifo->tx_fifo_size / 2);
+ ctl = (ctl & ~MLXBF_TMFIFO_TX_CTL__HWM_MASK) |
+ FIELD_PREP(MLXBF_TMFIFO_TX_CTL__HWM_MASK,
+ fifo->tx_fifo_size - 1);
+ writeq(ctl, fifo->tx_base + MLXBF_TMFIFO_TX_CTL);
+
+ /* Get Rx FIFO size and set the low/high watermark. */
+ ctl = readq(fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+ fifo->rx_fifo_size =
+ FIELD_GET(MLXBF_TMFIFO_RX_CTL__MAX_ENTRIES_MASK, ctl);
+ ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__LWM_MASK) |
+ FIELD_PREP(MLXBF_TMFIFO_RX_CTL__LWM_MASK, 0);
+ ctl = (ctl & ~MLXBF_TMFIFO_RX_CTL__HWM_MASK) |
+ FIELD_PREP(MLXBF_TMFIFO_RX_CTL__HWM_MASK, 1);
+ writeq(ctl, fifo->rx_base + MLXBF_TMFIFO_RX_CTL);
+}
+
+static void mlxbf_tmfifo_cleanup(struct mlxbf_tmfifo *fifo)
+{
+ int i;
+
+ fifo->is_ready = false;
+ del_timer_sync(&fifo->timer);
+ mlxbf_tmfifo_disable_irqs(fifo);
+ cancel_work_sync(&fifo->work);
+ for (i = 0; i < MLXBF_TMFIFO_VDEV_MAX; i++)
+ mlxbf_tmfifo_delete_vdev(fifo, i);
+}
+
+/* Probe the TMFIFO. */
+static int mlxbf_tmfifo_probe(struct platform_device *pdev)
+{
+ struct virtio_net_config net_config;
+ struct device *dev = &pdev->dev;
+ struct mlxbf_tmfifo *fifo;
+ int i, rc;
+
+ fifo = devm_kzalloc(dev, sizeof(*fifo), GFP_KERNEL);
+ if (!fifo)
+ return -ENOMEM;
+
+ spin_lock_init(&fifo->spin_lock[0]);
+ spin_lock_init(&fifo->spin_lock[1]);
+ INIT_WORK(&fifo->work, mlxbf_tmfifo_work_handler);
+ mutex_init(&fifo->lock);
+
+ /* Get the resource of the Rx FIFO. */
+ fifo->rx_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(fifo->rx_base))
+ return PTR_ERR(fifo->rx_base);
+
+ /* Get the resource of the Tx FIFO. */
+ fifo->tx_base = devm_platform_ioremap_resource(pdev, 1);
+ if (IS_ERR(fifo->tx_base))
+ return PTR_ERR(fifo->tx_base);
+
+ platform_set_drvdata(pdev, fifo);
+
+ timer_setup(&fifo->timer, mlxbf_tmfifo_timer, 0);
+
+ for (i = 0; i < MLXBF_TM_MAX_IRQ; i++) {
+ fifo->irq_info[i].index = i;
+ fifo->irq_info[i].fifo = fifo;
+ fifo->irq_info[i].irq = platform_get_irq(pdev, i);
+ rc = devm_request_irq(dev, fifo->irq_info[i].irq,
+ mlxbf_tmfifo_irq_handler, 0,
+ "tmfifo", &fifo->irq_info[i]);
+ if (rc) {
+ dev_err(dev, "devm_request_irq failed\n");
+ fifo->irq_info[i].irq = 0;
+ return rc;
+ }
+ }
+
+ mlxbf_tmfifo_set_threshold(fifo);
+
+ /* Create the console vdev. */
+ rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_CONSOLE, 0, NULL, 0);
+ if (rc)
+ goto fail;
+
+ /* Create the network vdev. */
+ memset(&net_config, 0, sizeof(net_config));
+
+ /* A legacy-only interface for now. */
+ net_config.mtu = __cpu_to_virtio16(virtio_legacy_is_little_endian(),
+ ETH_DATA_LEN);
+ net_config.status = __cpu_to_virtio16(virtio_legacy_is_little_endian(),
+ VIRTIO_NET_S_LINK_UP);
+ mlxbf_tmfifo_get_cfg_mac(net_config.mac);
+ rc = mlxbf_tmfifo_create_vdev(dev, fifo, VIRTIO_ID_NET,
+ MLXBF_TMFIFO_NET_FEATURES, &net_config,
+ sizeof(net_config));
+ if (rc)
+ goto fail;
+
+ mod_timer(&fifo->timer, jiffies + MLXBF_TMFIFO_TIMER_INTERVAL);
+
+ fifo->is_ready = true;
+ return 0;
+
+fail:
+ mlxbf_tmfifo_cleanup(fifo);
+ return rc;
+}
+
+/* Device remove function. */
+static int mlxbf_tmfifo_remove(struct platform_device *pdev)
+{
+ struct mlxbf_tmfifo *fifo = platform_get_drvdata(pdev);
+
+ mlxbf_tmfifo_cleanup(fifo);
+
+ return 0;
+}
+
+static const struct acpi_device_id mlxbf_tmfifo_acpi_match[] = {
+ { "MLNXBF01", 0 },
+ {}
+};
+MODULE_DEVICE_TABLE(acpi, mlxbf_tmfifo_acpi_match);
+
+static struct platform_driver mlxbf_tmfifo_driver = {
+ .probe = mlxbf_tmfifo_probe,
+ .remove = mlxbf_tmfifo_remove,
+ .driver = {
+ .name = "bf-tmfifo",
+ .acpi_match_table = mlxbf_tmfifo_acpi_match,
+ },
+};
+
+module_platform_driver(mlxbf_tmfifo_driver);
+
+MODULE_DESCRIPTION("Mellanox BlueField SoC TmFifo Driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Mellanox Technologies");
diff --git a/drivers/platform/mellanox/mlxreg-hotplug.c b/drivers/platform/mellanox/mlxreg-hotplug.c
new file mode 100644
index 000000000..b01344514
--- /dev/null
+++ b/drivers/platform/mellanox/mlxreg-hotplug.c
@@ -0,0 +1,743 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox hotplug driver
+ *
+ * Copyright (C) 2016-2020 Mellanox Technologies
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_data/mlxreg.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/string_helpers.h>
+#include <linux/regmap.h>
+#include <linux/workqueue.h>
+
+/* Offset of event and mask registers from status register. */
+#define MLXREG_HOTPLUG_EVENT_OFF 1
+#define MLXREG_HOTPLUG_MASK_OFF 2
+#define MLXREG_HOTPLUG_AGGR_MASK_OFF 1
+
+/* ASIC good health mask. */
+#define MLXREG_HOTPLUG_GOOD_HEALTH_MASK 0x02
+
+#define MLXREG_HOTPLUG_ATTRS_MAX 24
+#define MLXREG_HOTPLUG_NOT_ASSERT 3
+
+/**
+ * struct mlxreg_hotplug_priv_data - platform private data:
+ * @irq: platform device interrupt number;
+ * @dev: basic device;
+ * @pdev: platform device;
+ * @plat: platform data;
+ * @regmap: register map handle;
+ * @dwork_irq: delayed work template;
+ * @lock: spin lock;
+ * @hwmon: hwmon device;
+ * @mlxreg_hotplug_attr: sysfs attributes array;
+ * @mlxreg_hotplug_dev_attr: sysfs sensor device attribute array;
+ * @group: sysfs attribute group;
+ * @groups: list of sysfs attribute group for hwmon registration;
+ * @cell: location of top aggregation interrupt register;
+ * @mask: top aggregation interrupt common mask;
+ * @aggr_cache: last value of aggregation register status;
+ * @after_probe: flag indication probing completion;
+ * @not_asserted: number of entries in workqueue with no signal assertion;
+ */
+struct mlxreg_hotplug_priv_data {
+ int irq;
+ struct device *dev;
+ struct platform_device *pdev;
+ struct mlxreg_hotplug_platform_data *plat;
+ struct regmap *regmap;
+ struct delayed_work dwork_irq;
+ spinlock_t lock; /* sync with interrupt */
+ struct device *hwmon;
+ struct attribute *mlxreg_hotplug_attr[MLXREG_HOTPLUG_ATTRS_MAX + 1];
+ struct sensor_device_attribute_2
+ mlxreg_hotplug_dev_attr[MLXREG_HOTPLUG_ATTRS_MAX];
+ struct attribute_group group;
+ const struct attribute_group *groups[2];
+ u32 cell;
+ u32 mask;
+ u32 aggr_cache;
+ bool after_probe;
+ u8 not_asserted;
+};
+
+/* Environment variables array for udev. */
+static char *mlxreg_hotplug_udev_envp[] = { NULL, NULL };
+
+static int
+mlxreg_hotplug_udev_event_send(struct kobject *kobj,
+ struct mlxreg_core_data *data, bool action)
+{
+ char event_str[MLXREG_CORE_LABEL_MAX_SIZE + 2];
+ char label[MLXREG_CORE_LABEL_MAX_SIZE] = { 0 };
+
+ mlxreg_hotplug_udev_envp[0] = event_str;
+ string_upper(label, data->label);
+ snprintf(event_str, MLXREG_CORE_LABEL_MAX_SIZE, "%s=%d", label, !!action);
+
+ return kobject_uevent_env(kobj, KOBJ_CHANGE, mlxreg_hotplug_udev_envp);
+}
+
+static int mlxreg_hotplug_device_create(struct mlxreg_hotplug_priv_data *priv,
+ struct mlxreg_core_data *data)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct i2c_client *client;
+
+ /* Notify user by sending hwmon uevent. */
+ mlxreg_hotplug_udev_event_send(&priv->hwmon->kobj, data, true);
+
+ /*
+ * Return if adapter number is negative. It could be in case hotplug
+ * event is not associated with hotplug device.
+ */
+ if (data->hpdev.nr < 0)
+ return 0;
+
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ data->hpdev.adapter = i2c_get_adapter(data->hpdev.nr +
+ pdata->shift_nr);
+ if (!data->hpdev.adapter) {
+ dev_err(priv->dev, "Failed to get adapter for bus %d\n",
+ data->hpdev.nr + pdata->shift_nr);
+ return -EFAULT;
+ }
+
+ client = i2c_new_client_device(data->hpdev.adapter,
+ data->hpdev.brdinfo);
+ if (IS_ERR(client)) {
+ dev_err(priv->dev, "Failed to create client %s at bus %d at addr 0x%02x\n",
+ data->hpdev.brdinfo->type, data->hpdev.nr +
+ pdata->shift_nr, data->hpdev.brdinfo->addr);
+
+ i2c_put_adapter(data->hpdev.adapter);
+ data->hpdev.adapter = NULL;
+ return PTR_ERR(client);
+ }
+
+ data->hpdev.client = client;
+
+ return 0;
+}
+
+static void
+mlxreg_hotplug_device_destroy(struct mlxreg_hotplug_priv_data *priv,
+ struct mlxreg_core_data *data)
+{
+ /* Notify user by sending hwmon uevent. */
+ mlxreg_hotplug_udev_event_send(&priv->hwmon->kobj, data, false);
+
+ if (data->hpdev.client) {
+ i2c_unregister_device(data->hpdev.client);
+ data->hpdev.client = NULL;
+ }
+
+ if (data->hpdev.adapter) {
+ i2c_put_adapter(data->hpdev.adapter);
+ data->hpdev.adapter = NULL;
+ }
+}
+
+static ssize_t mlxreg_hotplug_attr_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxreg_hotplug_priv_data *priv = dev_get_drvdata(dev);
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ int index = to_sensor_dev_attr_2(attr)->index;
+ int nr = to_sensor_dev_attr_2(attr)->nr;
+ struct mlxreg_core_item *item;
+ struct mlxreg_core_data *data;
+ u32 regval;
+ int ret;
+
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ item = pdata->items + nr;
+ data = item->data + index;
+
+ ret = regmap_read(priv->regmap, data->reg, &regval);
+ if (ret)
+ return ret;
+
+ if (item->health) {
+ regval &= data->mask;
+ } else {
+ /* Bit = 0 : functional if item->inversed is true. */
+ if (item->inversed)
+ regval = !(regval & data->mask);
+ else
+ regval = !!(regval & data->mask);
+ }
+
+ return sprintf(buf, "%u\n", regval);
+}
+
+#define PRIV_ATTR(i) priv->mlxreg_hotplug_attr[i]
+#define PRIV_DEV_ATTR(i) priv->mlxreg_hotplug_dev_attr[i]
+
+static int mlxreg_hotplug_attr_init(struct mlxreg_hotplug_priv_data *priv)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct mlxreg_core_item *item;
+ struct mlxreg_core_data *data;
+ unsigned long mask;
+ u32 regval;
+ int num_attrs = 0, id = 0, i, j, k, ret;
+
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ item = pdata->items;
+
+ /* Go over all kinds of items - psu, pwr, fan. */
+ for (i = 0; i < pdata->counter; i++, item++) {
+ if (item->capability) {
+ /*
+ * Read group capability register to get actual number
+ * of interrupt capable components and set group mask
+ * accordingly.
+ */
+ ret = regmap_read(priv->regmap, item->capability,
+ &regval);
+ if (ret)
+ return ret;
+
+ item->mask = GENMASK((regval & item->mask) - 1, 0);
+ }
+
+ data = item->data;
+
+ /* Go over all unmasked units within item. */
+ mask = item->mask;
+ k = 0;
+ for_each_set_bit(j, &mask, item->count) {
+ if (data->capability) {
+ /*
+ * Read capability register and skip non
+ * relevant attributes.
+ */
+ ret = regmap_read(priv->regmap,
+ data->capability, &regval);
+ if (ret)
+ return ret;
+ if (!(regval & data->bit)) {
+ data++;
+ continue;
+ }
+ }
+ PRIV_ATTR(id) = &PRIV_DEV_ATTR(id).dev_attr.attr;
+ PRIV_ATTR(id)->name = devm_kasprintf(&priv->pdev->dev,
+ GFP_KERNEL,
+ data->label);
+
+ if (!PRIV_ATTR(id)->name) {
+ dev_err(priv->dev, "Memory allocation failed for attr %d.\n",
+ id);
+ return -ENOMEM;
+ }
+
+ PRIV_DEV_ATTR(id).dev_attr.attr.name =
+ PRIV_ATTR(id)->name;
+ PRIV_DEV_ATTR(id).dev_attr.attr.mode = 0444;
+ PRIV_DEV_ATTR(id).dev_attr.show =
+ mlxreg_hotplug_attr_show;
+ PRIV_DEV_ATTR(id).nr = i;
+ PRIV_DEV_ATTR(id).index = k;
+ sysfs_attr_init(&PRIV_DEV_ATTR(id).dev_attr.attr);
+ data++;
+ id++;
+ k++;
+ }
+ num_attrs += k;
+ }
+
+ priv->group.attrs = devm_kcalloc(&priv->pdev->dev,
+ num_attrs,
+ sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!priv->group.attrs)
+ return -ENOMEM;
+
+ priv->group.attrs = priv->mlxreg_hotplug_attr;
+ priv->groups[0] = &priv->group;
+ priv->groups[1] = NULL;
+
+ return 0;
+}
+
+static void
+mlxreg_hotplug_work_helper(struct mlxreg_hotplug_priv_data *priv,
+ struct mlxreg_core_item *item)
+{
+ struct mlxreg_core_data *data;
+ unsigned long asserted;
+ u32 regval, bit;
+ int ret;
+
+ /*
+ * Validate if item related to received signal type is valid.
+ * It should never happen, excepted the situation when some
+ * piece of hardware is broken. In such situation just produce
+ * error message and return. Caller must continue to handle the
+ * signals from other devices if any.
+ */
+ if (unlikely(!item)) {
+ dev_err(priv->dev, "False signal: at offset:mask 0x%02x:0x%02x.\n",
+ item->reg, item->mask);
+
+ return;
+ }
+
+ /* Mask event. */
+ ret = regmap_write(priv->regmap, item->reg + MLXREG_HOTPLUG_MASK_OFF,
+ 0);
+ if (ret)
+ goto out;
+
+ /* Read status. */
+ ret = regmap_read(priv->regmap, item->reg, &regval);
+ if (ret)
+ goto out;
+
+ /* Set asserted bits and save last status. */
+ regval &= item->mask;
+ asserted = item->cache ^ regval;
+ item->cache = regval;
+
+ for_each_set_bit(bit, &asserted, 8) {
+ data = item->data + bit;
+ if (regval & BIT(bit)) {
+ if (item->inversed)
+ mlxreg_hotplug_device_destroy(priv, data);
+ else
+ mlxreg_hotplug_device_create(priv, data);
+ } else {
+ if (item->inversed)
+ mlxreg_hotplug_device_create(priv, data);
+ else
+ mlxreg_hotplug_device_destroy(priv, data);
+ }
+ }
+
+ /* Acknowledge event. */
+ ret = regmap_write(priv->regmap, item->reg + MLXREG_HOTPLUG_EVENT_OFF,
+ 0);
+ if (ret)
+ goto out;
+
+ /* Unmask event. */
+ ret = regmap_write(priv->regmap, item->reg + MLXREG_HOTPLUG_MASK_OFF,
+ item->mask);
+
+ out:
+ if (ret)
+ dev_err(priv->dev, "Failed to complete workqueue.\n");
+}
+
+static void
+mlxreg_hotplug_health_work_helper(struct mlxreg_hotplug_priv_data *priv,
+ struct mlxreg_core_item *item)
+{
+ struct mlxreg_core_data *data = item->data;
+ u32 regval;
+ int i, ret = 0;
+
+ for (i = 0; i < item->count; i++, data++) {
+ /* Mask event. */
+ ret = regmap_write(priv->regmap, data->reg +
+ MLXREG_HOTPLUG_MASK_OFF, 0);
+ if (ret)
+ goto out;
+
+ /* Read status. */
+ ret = regmap_read(priv->regmap, data->reg, &regval);
+ if (ret)
+ goto out;
+
+ regval &= data->mask;
+
+ if (item->cache == regval)
+ goto ack_event;
+
+ /*
+ * ASIC health indication is provided through two bits. Bits
+ * value 0x2 indicates that ASIC reached the good health, value
+ * 0x0 indicates ASIC the bad health or dormant state and value
+ * 0x3 indicates the booting state. During ASIC reset it should
+ * pass the following states: dormant -> booting -> good.
+ */
+ if (regval == MLXREG_HOTPLUG_GOOD_HEALTH_MASK) {
+ if (!data->attached) {
+ /*
+ * ASIC is in steady state. Connect associated
+ * device, if configured.
+ */
+ mlxreg_hotplug_device_create(priv, data);
+ data->attached = true;
+ }
+ } else {
+ if (data->attached) {
+ /*
+ * ASIC health is failed after ASIC has been
+ * in steady state. Disconnect associated
+ * device, if it has been connected.
+ */
+ mlxreg_hotplug_device_destroy(priv, data);
+ data->attached = false;
+ data->health_cntr = 0;
+ }
+ }
+ item->cache = regval;
+ack_event:
+ /* Acknowledge event. */
+ ret = regmap_write(priv->regmap, data->reg +
+ MLXREG_HOTPLUG_EVENT_OFF, 0);
+ if (ret)
+ goto out;
+
+ /* Unmask event. */
+ ret = regmap_write(priv->regmap, data->reg +
+ MLXREG_HOTPLUG_MASK_OFF, data->mask);
+ if (ret)
+ goto out;
+ }
+
+ out:
+ if (ret)
+ dev_err(priv->dev, "Failed to complete workqueue.\n");
+}
+
+/*
+ * mlxreg_hotplug_work_handler - performs traversing of device interrupt
+ * registers according to the below hierarchy schema:
+ *
+ * Aggregation registers (status/mask)
+ * PSU registers: *---*
+ * *-----------------* | |
+ * |status/event/mask|-----> | * |
+ * *-----------------* | |
+ * Power registers: | |
+ * *-----------------* | |
+ * |status/event/mask|-----> | * |
+ * *-----------------* | |
+ * FAN registers: | |--> CPU
+ * *-----------------* | |
+ * |status/event/mask|-----> | * |
+ * *-----------------* | |
+ * ASIC registers: | |
+ * *-----------------* | |
+ * |status/event/mask|-----> | * |
+ * *-----------------* | |
+ * *---*
+ *
+ * In case some system changed are detected: FAN in/out, PSU in/out, power
+ * cable attached/detached, ASIC health good/bad, relevant device is created
+ * or destroyed.
+ */
+static void mlxreg_hotplug_work_handler(struct work_struct *work)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct mlxreg_hotplug_priv_data *priv;
+ struct mlxreg_core_item *item;
+ u32 regval, aggr_asserted;
+ unsigned long flags;
+ int i, ret;
+
+ priv = container_of(work, struct mlxreg_hotplug_priv_data,
+ dwork_irq.work);
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ item = pdata->items;
+
+ /* Mask aggregation event. */
+ ret = regmap_write(priv->regmap, pdata->cell +
+ MLXREG_HOTPLUG_AGGR_MASK_OFF, 0);
+ if (ret < 0)
+ goto out;
+
+ /* Read aggregation status. */
+ ret = regmap_read(priv->regmap, pdata->cell, &regval);
+ if (ret)
+ goto out;
+
+ regval &= pdata->mask;
+ aggr_asserted = priv->aggr_cache ^ regval;
+ priv->aggr_cache = regval;
+
+ /*
+ * Handler is invoked, but no assertion is detected at top aggregation
+ * status level. Set aggr_asserted to mask value to allow handler extra
+ * run over all relevant signals to recover any missed signal.
+ */
+ if (priv->not_asserted == MLXREG_HOTPLUG_NOT_ASSERT) {
+ priv->not_asserted = 0;
+ aggr_asserted = pdata->mask;
+ }
+ if (!aggr_asserted)
+ goto unmask_event;
+
+ /* Handle topology and health configuration changes. */
+ for (i = 0; i < pdata->counter; i++, item++) {
+ if (aggr_asserted & item->aggr_mask) {
+ if (item->health)
+ mlxreg_hotplug_health_work_helper(priv, item);
+ else
+ mlxreg_hotplug_work_helper(priv, item);
+ }
+ }
+
+ spin_lock_irqsave(&priv->lock, flags);
+
+ /*
+ * It is possible, that some signals have been inserted, while
+ * interrupt has been masked by mlxreg_hotplug_work_handler. In this
+ * case such signals will be missed. In order to handle these signals
+ * delayed work is canceled and work task re-scheduled for immediate
+ * execution. It allows to handle missed signals, if any. In other case
+ * work handler just validates that no new signals have been received
+ * during masking.
+ */
+ cancel_delayed_work(&priv->dwork_irq);
+ schedule_delayed_work(&priv->dwork_irq, 0);
+
+ spin_unlock_irqrestore(&priv->lock, flags);
+
+ return;
+
+unmask_event:
+ priv->not_asserted++;
+ /* Unmask aggregation event (no need acknowledge). */
+ ret = regmap_write(priv->regmap, pdata->cell +
+ MLXREG_HOTPLUG_AGGR_MASK_OFF, pdata->mask);
+
+ out:
+ if (ret)
+ dev_err(priv->dev, "Failed to complete workqueue.\n");
+}
+
+static int mlxreg_hotplug_set_irq(struct mlxreg_hotplug_priv_data *priv)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct mlxreg_core_item *item;
+ struct mlxreg_core_data *data;
+ u32 regval;
+ int i, j, ret;
+
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ item = pdata->items;
+
+ for (i = 0; i < pdata->counter; i++, item++) {
+ /* Clear group presense event. */
+ ret = regmap_write(priv->regmap, item->reg +
+ MLXREG_HOTPLUG_EVENT_OFF, 0);
+ if (ret)
+ goto out;
+
+ /*
+ * Verify if hardware configuration requires to disable
+ * interrupt capability for some of components.
+ */
+ data = item->data;
+ for (j = 0; j < item->count; j++, data++) {
+ /* Verify if the attribute has capability register. */
+ if (data->capability) {
+ /* Read capability register. */
+ ret = regmap_read(priv->regmap,
+ data->capability, &regval);
+ if (ret)
+ goto out;
+
+ if (!(regval & data->bit))
+ item->mask &= ~BIT(j);
+ }
+ }
+
+ /* Set group initial status as mask and unmask group event. */
+ if (item->inversed) {
+ item->cache = item->mask;
+ ret = regmap_write(priv->regmap, item->reg +
+ MLXREG_HOTPLUG_MASK_OFF,
+ item->mask);
+ if (ret)
+ goto out;
+ }
+ }
+
+ /* Keep aggregation initial status as zero and unmask events. */
+ ret = regmap_write(priv->regmap, pdata->cell +
+ MLXREG_HOTPLUG_AGGR_MASK_OFF, pdata->mask);
+ if (ret)
+ goto out;
+
+ /* Keep low aggregation initial status as zero and unmask events. */
+ if (pdata->cell_low) {
+ ret = regmap_write(priv->regmap, pdata->cell_low +
+ MLXREG_HOTPLUG_AGGR_MASK_OFF,
+ pdata->mask_low);
+ if (ret)
+ goto out;
+ }
+
+ /* Invoke work handler for initializing hot plug devices setting. */
+ mlxreg_hotplug_work_handler(&priv->dwork_irq.work);
+
+ out:
+ if (ret)
+ dev_err(priv->dev, "Failed to set interrupts.\n");
+ enable_irq(priv->irq);
+ return ret;
+}
+
+static void mlxreg_hotplug_unset_irq(struct mlxreg_hotplug_priv_data *priv)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct mlxreg_core_item *item;
+ struct mlxreg_core_data *data;
+ int count, i, j;
+
+ pdata = dev_get_platdata(&priv->pdev->dev);
+ item = pdata->items;
+ disable_irq(priv->irq);
+ cancel_delayed_work_sync(&priv->dwork_irq);
+
+ /* Mask low aggregation event, if defined. */
+ if (pdata->cell_low)
+ regmap_write(priv->regmap, pdata->cell_low +
+ MLXREG_HOTPLUG_AGGR_MASK_OFF, 0);
+
+ /* Mask aggregation event. */
+ regmap_write(priv->regmap, pdata->cell + MLXREG_HOTPLUG_AGGR_MASK_OFF,
+ 0);
+
+ /* Clear topology configurations. */
+ for (i = 0; i < pdata->counter; i++, item++) {
+ data = item->data;
+ /* Mask group presense event. */
+ regmap_write(priv->regmap, data->reg + MLXREG_HOTPLUG_MASK_OFF,
+ 0);
+ /* Clear group presense event. */
+ regmap_write(priv->regmap, data->reg +
+ MLXREG_HOTPLUG_EVENT_OFF, 0);
+
+ /* Remove all the attached devices in group. */
+ count = item->count;
+ for (j = 0; j < count; j++, data++)
+ mlxreg_hotplug_device_destroy(priv, data);
+ }
+}
+
+static irqreturn_t mlxreg_hotplug_irq_handler(int irq, void *dev)
+{
+ struct mlxreg_hotplug_priv_data *priv;
+
+ priv = (struct mlxreg_hotplug_priv_data *)dev;
+
+ /* Schedule work task for immediate execution.*/
+ schedule_delayed_work(&priv->dwork_irq, 0);
+
+ return IRQ_HANDLED;
+}
+
+static int mlxreg_hotplug_probe(struct platform_device *pdev)
+{
+ struct mlxreg_core_hotplug_platform_data *pdata;
+ struct mlxreg_hotplug_priv_data *priv;
+ struct i2c_adapter *deferred_adap;
+ int err;
+
+ pdata = dev_get_platdata(&pdev->dev);
+ if (!pdata) {
+ dev_err(&pdev->dev, "Failed to get platform data.\n");
+ return -EINVAL;
+ }
+
+ /* Defer probing if the necessary adapter is not configured yet. */
+ deferred_adap = i2c_get_adapter(pdata->deferred_nr);
+ if (!deferred_adap)
+ return -EPROBE_DEFER;
+ i2c_put_adapter(deferred_adap);
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ if (pdata->irq) {
+ priv->irq = pdata->irq;
+ } else {
+ priv->irq = platform_get_irq(pdev, 0);
+ if (priv->irq < 0)
+ return priv->irq;
+ }
+
+ priv->regmap = pdata->regmap;
+ priv->dev = pdev->dev.parent;
+ priv->pdev = pdev;
+
+ err = devm_request_irq(&pdev->dev, priv->irq,
+ mlxreg_hotplug_irq_handler, IRQF_TRIGGER_FALLING
+ | IRQF_SHARED, "mlxreg-hotplug", priv);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to request irq: %d\n", err);
+ return err;
+ }
+
+ disable_irq(priv->irq);
+ spin_lock_init(&priv->lock);
+ INIT_DELAYED_WORK(&priv->dwork_irq, mlxreg_hotplug_work_handler);
+ dev_set_drvdata(&pdev->dev, priv);
+
+ err = mlxreg_hotplug_attr_init(priv);
+ if (err) {
+ dev_err(&pdev->dev, "Failed to allocate attributes: %d\n",
+ err);
+ return err;
+ }
+
+ priv->hwmon = devm_hwmon_device_register_with_groups(&pdev->dev,
+ "mlxreg_hotplug", priv, priv->groups);
+ if (IS_ERR(priv->hwmon)) {
+ dev_err(&pdev->dev, "Failed to register hwmon device %ld\n",
+ PTR_ERR(priv->hwmon));
+ return PTR_ERR(priv->hwmon);
+ }
+
+ /* Perform initial interrupts setup. */
+ mlxreg_hotplug_set_irq(priv);
+ priv->after_probe = true;
+
+ return 0;
+}
+
+static int mlxreg_hotplug_remove(struct platform_device *pdev)
+{
+ struct mlxreg_hotplug_priv_data *priv = dev_get_drvdata(&pdev->dev);
+
+ /* Clean interrupts setup. */
+ mlxreg_hotplug_unset_irq(priv);
+ devm_free_irq(&pdev->dev, priv->irq, priv);
+
+ return 0;
+}
+
+static struct platform_driver mlxreg_hotplug_driver = {
+ .driver = {
+ .name = "mlxreg-hotplug",
+ },
+ .probe = mlxreg_hotplug_probe,
+ .remove = mlxreg_hotplug_remove,
+};
+
+module_platform_driver(mlxreg_hotplug_driver);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox regmap hotplug platform driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("platform:mlxreg-hotplug");
diff --git a/drivers/platform/mellanox/mlxreg-io.c b/drivers/platform/mellanox/mlxreg-io.c
new file mode 100644
index 000000000..a916cd89c
--- /dev/null
+++ b/drivers/platform/mellanox/mlxreg-io.c
@@ -0,0 +1,266 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Mellanox register access driver
+ *
+ * Copyright (C) 2018 Mellanox Technologies
+ * Copyright (C) 2018 Vadim Pasternak <vadimp@mellanox.com>
+ */
+
+#include <linux/bitops.h>
+#include <linux/device.h>
+#include <linux/hwmon.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_data/mlxreg.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* Attribute parameters. */
+#define MLXREG_IO_ATT_SIZE 10
+#define MLXREG_IO_ATT_NUM 48
+
+/**
+ * struct mlxreg_io_priv_data - driver's private data:
+ *
+ * @pdev: platform device;
+ * @pdata: platform data;
+ * @hwmon: hwmon device;
+ * @mlxreg_io_attr: sysfs attributes array;
+ * @mlxreg_io_dev_attr: sysfs sensor device attribute array;
+ * @group: sysfs attribute group;
+ * @groups: list of sysfs attribute group for hwmon registration;
+ * @regsize: size of a register value;
+ */
+struct mlxreg_io_priv_data {
+ struct platform_device *pdev;
+ struct mlxreg_core_platform_data *pdata;
+ struct device *hwmon;
+ struct attribute *mlxreg_io_attr[MLXREG_IO_ATT_NUM + 1];
+ struct sensor_device_attribute mlxreg_io_dev_attr[MLXREG_IO_ATT_NUM];
+ struct attribute_group group;
+ const struct attribute_group *groups[2];
+ int regsize;
+};
+
+static int
+mlxreg_io_get_reg(void *regmap, struct mlxreg_core_data *data, u32 in_val,
+ bool rw_flag, int regsize, u32 *regval)
+{
+ int i, val, ret;
+
+ ret = regmap_read(regmap, data->reg, regval);
+ if (ret)
+ goto access_error;
+
+ /*
+ * There are four kinds of attributes: single bit, full register's
+ * bits, bit sequence, bits in few registers For the first kind field
+ * mask indicates which bits are not related and field bit is set zero.
+ * For the second kind field mask is set to zero and field bit is set
+ * with all bits one. No special handling for such kind of attributes -
+ * pass value as is. For the third kind, the field mask indicates which
+ * bits are related and the field bit is set to the first bit number
+ * (from 1 to 32) is the bit sequence. For the fourth kind - the number
+ * of registers which should be read for getting an attribute are
+ * specified through 'data->regnum' field.
+ */
+ if (!data->bit) {
+ /* Single bit. */
+ if (rw_flag) {
+ /* For show: expose effective bit value as 0 or 1. */
+ *regval = !!(*regval & ~data->mask);
+ } else {
+ /* For store: set effective bit value. */
+ *regval &= data->mask;
+ if (in_val)
+ *regval |= ~data->mask;
+ }
+ } else if (data->mask) {
+ /* Bit sequence. */
+ if (rw_flag) {
+ /* For show: mask and shift right. */
+ *regval = ror32(*regval & data->mask, (data->bit - 1));
+ } else {
+ /* For store: shift to the position and mask. */
+ in_val = rol32(in_val, data->bit - 1) & data->mask;
+ /* Clear relevant bits and set them to new value. */
+ *regval = (*regval & ~data->mask) | in_val;
+ }
+ } else {
+ /*
+ * Some attributes could occupied few registers in case regmap
+ * bit size is 8 or 16. Compose such attributes from 'regnum'
+ * registers. Such attributes contain read-only data.
+ */
+ for (i = 1; i < data->regnum; i++) {
+ ret = regmap_read(regmap, data->reg + i, &val);
+ if (ret)
+ goto access_error;
+
+ *regval |= rol32(val, regsize * i * 8);
+ }
+ }
+
+access_error:
+ return ret;
+}
+
+static ssize_t
+mlxreg_io_attr_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct mlxreg_io_priv_data *priv = dev_get_drvdata(dev);
+ int index = to_sensor_dev_attr(attr)->index;
+ struct mlxreg_core_data *data = priv->pdata->data + index;
+ u32 regval = 0;
+ int ret;
+
+ ret = mlxreg_io_get_reg(priv->pdata->regmap, data, 0, true,
+ priv->regsize, &regval);
+ if (ret)
+ goto access_error;
+
+ return sprintf(buf, "%u\n", regval);
+
+access_error:
+ return ret;
+}
+
+static ssize_t
+mlxreg_io_attr_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct mlxreg_io_priv_data *priv = dev_get_drvdata(dev);
+ int index = to_sensor_dev_attr(attr)->index;
+ struct mlxreg_core_data *data = priv->pdata->data + index;
+ u32 input_val, regval;
+ int ret;
+
+ if (len > MLXREG_IO_ATT_SIZE)
+ return -EINVAL;
+
+ /* Convert buffer to input value. */
+ ret = kstrtou32(buf, 0, &input_val);
+ if (ret)
+ return ret;
+
+ ret = mlxreg_io_get_reg(priv->pdata->regmap, data, input_val, false,
+ priv->regsize, &regval);
+ if (ret)
+ goto access_error;
+
+ ret = regmap_write(priv->pdata->regmap, data->reg, regval);
+ if (ret)
+ goto access_error;
+
+ return len;
+
+access_error:
+ dev_err(&priv->pdev->dev, "Bus access error\n");
+ return ret;
+}
+
+static struct device_attribute mlxreg_io_devattr_rw = {
+ .show = mlxreg_io_attr_show,
+ .store = mlxreg_io_attr_store,
+};
+
+static int mlxreg_io_attr_init(struct mlxreg_io_priv_data *priv)
+{
+ int i;
+
+ priv->group.attrs = devm_kcalloc(&priv->pdev->dev,
+ priv->pdata->counter,
+ sizeof(struct attribute *),
+ GFP_KERNEL);
+ if (!priv->group.attrs)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->pdata->counter; i++) {
+ priv->mlxreg_io_attr[i] =
+ &priv->mlxreg_io_dev_attr[i].dev_attr.attr;
+ memcpy(&priv->mlxreg_io_dev_attr[i].dev_attr,
+ &mlxreg_io_devattr_rw, sizeof(struct device_attribute));
+
+ /* Set attribute name as a label. */
+ priv->mlxreg_io_attr[i]->name =
+ devm_kasprintf(&priv->pdev->dev, GFP_KERNEL,
+ priv->pdata->data[i].label);
+
+ if (!priv->mlxreg_io_attr[i]->name) {
+ dev_err(&priv->pdev->dev, "Memory allocation failed for sysfs attribute %d.\n",
+ i + 1);
+ return -ENOMEM;
+ }
+
+ priv->mlxreg_io_dev_attr[i].dev_attr.attr.mode =
+ priv->pdata->data[i].mode;
+ priv->mlxreg_io_dev_attr[i].dev_attr.attr.name =
+ priv->mlxreg_io_attr[i]->name;
+ priv->mlxreg_io_dev_attr[i].index = i;
+ sysfs_attr_init(&priv->mlxreg_io_dev_attr[i].dev_attr.attr);
+ }
+
+ priv->group.attrs = priv->mlxreg_io_attr;
+ priv->groups[0] = &priv->group;
+ priv->groups[1] = NULL;
+
+ return 0;
+}
+
+static int mlxreg_io_probe(struct platform_device *pdev)
+{
+ struct mlxreg_io_priv_data *priv;
+ int err;
+
+ priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->pdata = dev_get_platdata(&pdev->dev);
+ if (!priv->pdata) {
+ dev_err(&pdev->dev, "Failed to get platform data.\n");
+ return -EINVAL;
+ }
+
+ priv->pdev = pdev;
+ priv->regsize = regmap_get_val_bytes(priv->pdata->regmap);
+ if (priv->regsize < 0)
+ return priv->regsize;
+
+ err = mlxreg_io_attr_init(priv);
+ if (err) {
+ dev_err(&priv->pdev->dev, "Failed to allocate attributes: %d\n",
+ err);
+ return err;
+ }
+
+ priv->hwmon = devm_hwmon_device_register_with_groups(&pdev->dev,
+ "mlxreg_io",
+ priv,
+ priv->groups);
+ if (IS_ERR(priv->hwmon)) {
+ dev_err(&pdev->dev, "Failed to register hwmon device %ld\n",
+ PTR_ERR(priv->hwmon));
+ return PTR_ERR(priv->hwmon);
+ }
+
+ dev_set_drvdata(&pdev->dev, priv);
+
+ return 0;
+}
+
+static struct platform_driver mlxreg_io_driver = {
+ .driver = {
+ .name = "mlxreg-io",
+ },
+ .probe = mlxreg_io_probe,
+};
+
+module_platform_driver(mlxreg_io_driver);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox regmap I/O access driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:mlxreg-io");