summaryrefslogtreecommitdiffstats
path: root/lib/dim
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/dim/Makefile7
-rw-r--r--lib/dim/dim.c84
-rw-r--r--lib/dim/net_dim.c247
-rw-r--r--lib/dim/rdma_dim.c109
4 files changed, 447 insertions, 0 deletions
diff --git a/lib/dim/Makefile b/lib/dim/Makefile
new file mode 100644
index 000000000..1d6858a10
--- /dev/null
+++ b/lib/dim/Makefile
@@ -0,0 +1,7 @@
+#
+# DIM Dynamic Interrupt Moderation library
+#
+
+obj-$(CONFIG_DIMLIB) += dim.o
+
+dim-y := dim.o net_dim.o rdma_dim.o
diff --git a/lib/dim/dim.c b/lib/dim/dim.c
new file mode 100644
index 000000000..e89aaf07b
--- /dev/null
+++ b/lib/dim/dim.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+bool dim_on_top(struct dim *dim)
+{
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ return true;
+ case DIM_GOING_RIGHT:
+ return (dim->steps_left > 1) && (dim->steps_right == 1);
+ default: /* DIM_GOING_LEFT */
+ return (dim->steps_right > 1) && (dim->steps_left == 1);
+ }
+}
+EXPORT_SYMBOL(dim_on_top);
+
+void dim_turn(struct dim *dim)
+{
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ break;
+ case DIM_GOING_RIGHT:
+ dim->tune_state = DIM_GOING_LEFT;
+ dim->steps_left = 0;
+ break;
+ case DIM_GOING_LEFT:
+ dim->tune_state = DIM_GOING_RIGHT;
+ dim->steps_right = 0;
+ break;
+ }
+}
+EXPORT_SYMBOL(dim_turn);
+
+void dim_park_on_top(struct dim *dim)
+{
+ dim->steps_right = 0;
+ dim->steps_left = 0;
+ dim->tired = 0;
+ dim->tune_state = DIM_PARKING_ON_TOP;
+}
+EXPORT_SYMBOL(dim_park_on_top);
+
+void dim_park_tired(struct dim *dim)
+{
+ dim->steps_right = 0;
+ dim->steps_left = 0;
+ dim->tune_state = DIM_PARKING_TIRED;
+}
+EXPORT_SYMBOL(dim_park_tired);
+
+bool dim_calc_stats(struct dim_sample *start, struct dim_sample *end,
+ struct dim_stats *curr_stats)
+{
+ /* u32 holds up to 71 minutes, should be enough */
+ u32 delta_us = ktime_us_delta(end->time, start->time);
+ u32 npkts = BIT_GAP(BITS_PER_TYPE(u32), end->pkt_ctr, start->pkt_ctr);
+ u32 nbytes = BIT_GAP(BITS_PER_TYPE(u32), end->byte_ctr,
+ start->byte_ctr);
+ u32 ncomps = BIT_GAP(BITS_PER_TYPE(u32), end->comp_ctr,
+ start->comp_ctr);
+
+ if (!delta_us)
+ return false;
+
+ curr_stats->ppms = DIV_ROUND_UP(npkts * USEC_PER_MSEC, delta_us);
+ curr_stats->bpms = DIV_ROUND_UP(nbytes * USEC_PER_MSEC, delta_us);
+ curr_stats->epms = DIV_ROUND_UP(DIM_NEVENTS * USEC_PER_MSEC,
+ delta_us);
+ curr_stats->cpms = DIV_ROUND_UP(ncomps * USEC_PER_MSEC, delta_us);
+ if (curr_stats->epms != 0)
+ curr_stats->cpe_ratio = DIV_ROUND_DOWN_ULL(
+ curr_stats->cpms * 100, curr_stats->epms);
+ else
+ curr_stats->cpe_ratio = 0;
+
+ return true;
+}
+EXPORT_SYMBOL(dim_calc_stats);
diff --git a/lib/dim/net_dim.c b/lib/dim/net_dim.c
new file mode 100644
index 000000000..4e32f7aaa
--- /dev/null
+++ b/lib/dim/net_dim.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+/*
+ * Net DIM profiles:
+ * There are different set of profiles for each CQ period mode.
+ * There are different set of profiles for RX/TX CQs.
+ * Each profile size must be of NET_DIM_PARAMS_NUM_PROFILES
+ */
+#define NET_DIM_PARAMS_NUM_PROFILES 5
+#define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256
+#define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128
+#define NET_DIM_DEF_PROFILE_CQE 1
+#define NET_DIM_DEF_PROFILE_EQE 1
+
+#define NET_DIM_RX_EQE_PROFILES { \
+ {.usec = 1, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 8, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 64, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 128, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 256, .pkts = NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE,} \
+}
+
+#define NET_DIM_RX_CQE_PROFILES { \
+ {.usec = 2, .pkts = 256,}, \
+ {.usec = 8, .pkts = 128,}, \
+ {.usec = 16, .pkts = 64,}, \
+ {.usec = 32, .pkts = 64,}, \
+ {.usec = 64, .pkts = 64,} \
+}
+
+#define NET_DIM_TX_EQE_PROFILES { \
+ {.usec = 1, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 8, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 32, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 64, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,}, \
+ {.usec = 128, .pkts = NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE,} \
+}
+
+#define NET_DIM_TX_CQE_PROFILES { \
+ {.usec = 5, .pkts = 128,}, \
+ {.usec = 8, .pkts = 64,}, \
+ {.usec = 16, .pkts = 32,}, \
+ {.usec = 32, .pkts = 32,}, \
+ {.usec = 64, .pkts = 32,} \
+}
+
+static const struct dim_cq_moder
+rx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_RX_EQE_PROFILES,
+ NET_DIM_RX_CQE_PROFILES,
+};
+
+static const struct dim_cq_moder
+tx_profile[DIM_CQ_PERIOD_NUM_MODES][NET_DIM_PARAMS_NUM_PROFILES] = {
+ NET_DIM_TX_EQE_PROFILES,
+ NET_DIM_TX_CQE_PROFILES,
+};
+
+struct dim_cq_moder
+net_dim_get_rx_moderation(u8 cq_period_mode, int ix)
+{
+ struct dim_cq_moder cq_moder = rx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
+EXPORT_SYMBOL(net_dim_get_rx_moderation);
+
+struct dim_cq_moder
+net_dim_get_def_rx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_rx_moderation(cq_period_mode, profile_ix);
+}
+EXPORT_SYMBOL(net_dim_get_def_rx_moderation);
+
+struct dim_cq_moder
+net_dim_get_tx_moderation(u8 cq_period_mode, int ix)
+{
+ struct dim_cq_moder cq_moder = tx_profile[cq_period_mode][ix];
+
+ cq_moder.cq_period_mode = cq_period_mode;
+ return cq_moder;
+}
+EXPORT_SYMBOL(net_dim_get_tx_moderation);
+
+struct dim_cq_moder
+net_dim_get_def_tx_moderation(u8 cq_period_mode)
+{
+ u8 profile_ix = cq_period_mode == DIM_CQ_PERIOD_MODE_START_FROM_CQE ?
+ NET_DIM_DEF_PROFILE_CQE : NET_DIM_DEF_PROFILE_EQE;
+
+ return net_dim_get_tx_moderation(cq_period_mode, profile_ix);
+}
+EXPORT_SYMBOL(net_dim_get_def_tx_moderation);
+
+static int net_dim_step(struct dim *dim)
+{
+ if (dim->tired == (NET_DIM_PARAMS_NUM_PROFILES * 2))
+ return DIM_TOO_TIRED;
+
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ case DIM_PARKING_TIRED:
+ break;
+ case DIM_GOING_RIGHT:
+ if (dim->profile_ix == (NET_DIM_PARAMS_NUM_PROFILES - 1))
+ return DIM_ON_EDGE;
+ dim->profile_ix++;
+ dim->steps_right++;
+ break;
+ case DIM_GOING_LEFT:
+ if (dim->profile_ix == 0)
+ return DIM_ON_EDGE;
+ dim->profile_ix--;
+ dim->steps_left++;
+ break;
+ }
+
+ dim->tired++;
+ return DIM_STEPPED;
+}
+
+static void net_dim_exit_parking(struct dim *dim)
+{
+ dim->tune_state = dim->profile_ix ? DIM_GOING_LEFT : DIM_GOING_RIGHT;
+ net_dim_step(dim);
+}
+
+static int net_dim_stats_compare(struct dim_stats *curr,
+ struct dim_stats *prev)
+{
+ if (!prev->bpms)
+ return curr->bpms ? DIM_STATS_BETTER : DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->bpms, prev->bpms))
+ return (curr->bpms > prev->bpms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ if (!prev->ppms)
+ return curr->ppms ? DIM_STATS_BETTER :
+ DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->ppms, prev->ppms))
+ return (curr->ppms > prev->ppms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ if (!prev->epms)
+ return DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->epms, prev->epms))
+ return (curr->epms < prev->epms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ return DIM_STATS_SAME;
+}
+
+static bool net_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+ int prev_state = dim->tune_state;
+ int prev_ix = dim->profile_ix;
+ int stats_res;
+ int step_res;
+
+ switch (dim->tune_state) {
+ case DIM_PARKING_ON_TOP:
+ stats_res = net_dim_stats_compare(curr_stats,
+ &dim->prev_stats);
+ if (stats_res != DIM_STATS_SAME)
+ net_dim_exit_parking(dim);
+ break;
+
+ case DIM_PARKING_TIRED:
+ dim->tired--;
+ if (!dim->tired)
+ net_dim_exit_parking(dim);
+ break;
+
+ case DIM_GOING_RIGHT:
+ case DIM_GOING_LEFT:
+ stats_res = net_dim_stats_compare(curr_stats,
+ &dim->prev_stats);
+ if (stats_res != DIM_STATS_BETTER)
+ dim_turn(dim);
+
+ if (dim_on_top(dim)) {
+ dim_park_on_top(dim);
+ break;
+ }
+
+ step_res = net_dim_step(dim);
+ switch (step_res) {
+ case DIM_ON_EDGE:
+ dim_park_on_top(dim);
+ break;
+ case DIM_TOO_TIRED:
+ dim_park_tired(dim);
+ break;
+ }
+
+ break;
+ }
+
+ if (prev_state != DIM_PARKING_ON_TOP ||
+ dim->tune_state != DIM_PARKING_ON_TOP)
+ dim->prev_stats = *curr_stats;
+
+ return dim->profile_ix != prev_ix;
+}
+
+void net_dim(struct dim *dim, struct dim_sample end_sample)
+{
+ struct dim_stats curr_stats;
+ u16 nevents;
+
+ switch (dim->state) {
+ case DIM_MEASURE_IN_PROGRESS:
+ nevents = BIT_GAP(BITS_PER_TYPE(u16),
+ end_sample.event_ctr,
+ dim->start_sample.event_ctr);
+ if (nevents < DIM_NEVENTS)
+ break;
+ if (!dim_calc_stats(&dim->start_sample, &end_sample, &curr_stats))
+ break;
+ if (net_dim_decision(&curr_stats, dim)) {
+ dim->state = DIM_APPLY_NEW_PROFILE;
+ schedule_work(&dim->work);
+ break;
+ }
+ fallthrough;
+ case DIM_START_MEASURE:
+ dim_update_sample(end_sample.event_ctr, end_sample.pkt_ctr,
+ end_sample.byte_ctr, &dim->start_sample);
+ dim->state = DIM_MEASURE_IN_PROGRESS;
+ break;
+ case DIM_APPLY_NEW_PROFILE:
+ break;
+ }
+}
+EXPORT_SYMBOL(net_dim);
diff --git a/lib/dim/rdma_dim.c b/lib/dim/rdma_dim.c
new file mode 100644
index 000000000..88f779486
--- /dev/null
+++ b/lib/dim/rdma_dim.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2019, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/dim.h>
+
+static int rdma_dim_step(struct dim *dim)
+{
+ if (dim->tune_state == DIM_GOING_RIGHT) {
+ if (dim->profile_ix == (RDMA_DIM_PARAMS_NUM_PROFILES - 1))
+ return DIM_ON_EDGE;
+ dim->profile_ix++;
+ dim->steps_right++;
+ }
+ if (dim->tune_state == DIM_GOING_LEFT) {
+ if (dim->profile_ix == 0)
+ return DIM_ON_EDGE;
+ dim->profile_ix--;
+ dim->steps_left++;
+ }
+
+ return DIM_STEPPED;
+}
+
+static int rdma_dim_stats_compare(struct dim_stats *curr,
+ struct dim_stats *prev)
+{
+ /* first stat */
+ if (!prev->cpms)
+ return DIM_STATS_SAME;
+
+ if (IS_SIGNIFICANT_DIFF(curr->cpms, prev->cpms))
+ return (curr->cpms > prev->cpms) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ if (IS_SIGNIFICANT_DIFF(curr->cpe_ratio, prev->cpe_ratio))
+ return (curr->cpe_ratio > prev->cpe_ratio) ? DIM_STATS_BETTER :
+ DIM_STATS_WORSE;
+
+ return DIM_STATS_SAME;
+}
+
+static bool rdma_dim_decision(struct dim_stats *curr_stats, struct dim *dim)
+{
+ int prev_ix = dim->profile_ix;
+ u8 state = dim->tune_state;
+ int stats_res;
+ int step_res;
+
+ if (state != DIM_PARKING_ON_TOP && state != DIM_PARKING_TIRED) {
+ stats_res = rdma_dim_stats_compare(curr_stats,
+ &dim->prev_stats);
+
+ switch (stats_res) {
+ case DIM_STATS_SAME:
+ if (curr_stats->cpe_ratio <= 50 * prev_ix)
+ dim->profile_ix = 0;
+ break;
+ case DIM_STATS_WORSE:
+ dim_turn(dim);
+ fallthrough;
+ case DIM_STATS_BETTER:
+ step_res = rdma_dim_step(dim);
+ if (step_res == DIM_ON_EDGE)
+ dim_turn(dim);
+ break;
+ }
+ }
+
+ dim->prev_stats = *curr_stats;
+
+ return dim->profile_ix != prev_ix;
+}
+
+void rdma_dim(struct dim *dim, u64 completions)
+{
+ struct dim_sample *curr_sample = &dim->measuring_sample;
+ struct dim_stats curr_stats;
+ u32 nevents;
+
+ dim_update_sample_with_comps(curr_sample->event_ctr + 1, 0, 0,
+ curr_sample->comp_ctr + completions,
+ &dim->measuring_sample);
+
+ switch (dim->state) {
+ case DIM_MEASURE_IN_PROGRESS:
+ nevents = curr_sample->event_ctr - dim->start_sample.event_ctr;
+ if (nevents < DIM_NEVENTS)
+ break;
+ if (!dim_calc_stats(&dim->start_sample, curr_sample, &curr_stats))
+ break;
+ if (rdma_dim_decision(&curr_stats, dim)) {
+ dim->state = DIM_APPLY_NEW_PROFILE;
+ schedule_work(&dim->work);
+ break;
+ }
+ fallthrough;
+ case DIM_START_MEASURE:
+ dim->state = DIM_MEASURE_IN_PROGRESS;
+ dim_update_sample_with_comps(curr_sample->event_ctr, 0, 0,
+ curr_sample->comp_ctr,
+ &dim->start_sample);
+ break;
+ case DIM_APPLY_NEW_PROFILE:
+ break;
+ }
+}
+EXPORT_SYMBOL(rdma_dim);