11 files changed, 922 insertions, 0 deletions
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/__init__.py b/monitoring/ceph-mixin/tests_dashboards/features/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/__init__.py
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature
new file mode 100644
index 000000000..1a446cd2c
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/ceph-cluster.feature
@@ -0,0 +1,54 @@
+Feature: Ceph Cluster Dashboard
+
+Scenario: "Test total PG States"
+  Given the following series:
+    | metrics                  | values |
+    | ceph_pg_total{foo="var"} | 10 100 |
+    | ceph_pg_total{foo="bar"} | 20 200 |
+  Then Grafana panel `PG States` with legend `Total` shows:
+    | metrics | values |
+    | {}      | 300    |
+
+Scenario: "Test OSDs in"
+  Given the following series:
+    | metrics                          | values |
+    | ceph_osd_in{ceph_daemon="osd.0"} | 1.0    |
+    | ceph_osd_in{ceph_daemon="osd.1"} | 0.0    |
+    | ceph_osd_in{ceph_daemon="osd.2"} | 1.0    |
+  When variable `instance` is `.*`
+  Then Grafana panel `OSDs` with legend `In` shows:
+    | metrics | values |
+    | {}      | 2      |
+
+Scenario: "Test OSDs down"
+  Given the following series:
+    | metrics                                                  | values |
+    | ceph_osd_up{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0    |
+    | ceph_osd_up{ceph_daemon="osd.1", instance="127.0.0.1"} | 0.0    |
+    | ceph_osd_up{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0    |
+  When variable `instance` is `127.0.0.1`
+  Then Grafana panel `OSDs` with legend `Down` shows:
+    | metrics | values |
+    | {}      | 3      |
+
+Scenario: "Test OSDs out"
+  Given the following series:
+    | metrics                                                  | values |
+    | ceph_osd_in{ceph_daemon="osd.0", instance="127.0.0.1"} | 0.0    |
+    | ceph_osd_in{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0    |
+    | ceph_osd_in{ceph_daemon="osd.2", instance="127.0.0.1"} | 0.0    |
+  When variable `instance` is `127.0.0.1`
+  Then Grafana panel `OSDs` with legend `Out` shows:
+    | metrics | values |
+    | {}      | 2      |
+
+Scenario: "Test OSDs all"
+  Given the following series:
+    | metrics | values |
+    | ceph_osd_metadata{ceph_daemon="osd.0", instance="127.0.0.1"} | 1.0 |
+    | ceph_osd_metadata{ceph_daemon="osd.1", instance="127.0.0.1"} | 1.0 |
+    | ceph_osd_metadata{ceph_daemon="osd.2", instance="127.0.0.1"} | 1.0 |
+  When variable `instance` is `127.0.0.1`
+  Then Grafana panel `OSDs` with legend `All` shows:
+    | metrics | values |
+    | {}      | 3      |
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/environment.py b/monitoring/ceph-mixin/tests_dashboards/features/environment.py
new file mode 100644
index 000000000..5dc76a09e
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/environment.py
@@ -0,0 +1,135 @@
+# type: ignore[no-redef]
+# pylint: disable=E0611,W0613,E0102
+import copy
+
+from behave import given, then, when
+from prettytable import PrettyTable
+
+from tests_dashboards import PromqlTest
+from tests_dashboards.util import get_dashboards_data, resolve_time_and_unit
+
+
+class GlobalContext:
+    def __init__(self):
+        self.tested_queries_count = 0
+        self.promql_expr_test = None
+        self.data = get_dashboards_data()
+        self.query_map = self.data['queries']
+
+    def reset_promql_test(self):
+        self.promql_expr_test = PromqlTest()
+        self.promql_expr_test.variables = copy.copy(self.data['variables'])
+
+    def print_query_stats(self):
+        total = len(self.query_map)
+        table = PrettyTable()
+        table.field_names = ['Name', 'Queries', 'Tested', 'Cover']
+
+        def percent(tested, total):
+            return str(round((tested / total) * 100, 2)) + '%'
+
+        def file_name(path):
+            return path.split('/')[-1]
+
+        total = 0
+        tested = 0
+        for path, stat in self.data['stats'].items():
+            assert stat['total']
+            table.add_row([file_name(path), stat['total'], stat['tested'],
+                                     percent(stat['tested'], stat['total'])])
+            total += stat['total']
+            tested += stat['tested']
+
+        assert total
+        table.add_row(['Total', total, tested, percent(tested, total)])
+        print(table)
+
+
+global_context = GlobalContext()
+
+# Behave function overloading
+# ===========================
+
+
+def before_scenario(context, scenario):
+    global_context.reset_promql_test()
+
+
+def after_scenario(context, scenario):
+    assert global_context.promql_expr_test.run_promtool()
+
+
+def after_all(context):
+    global_context.print_query_stats()
+
+
+@given("the following series")
+def step_impl(context):
+    for row in context.table:
+        metric = row['metrics']
+        value = row['values']
+        global_context.promql_expr_test.add_series(metric, value)
+
+
+@when('evaluation interval is `{interval}`')
+def step_impl(context, interval):
+    interval_without_unit, unit = resolve_time_and_unit(interval)
+    if interval_without_unit is None:
+        raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+                           'A valid time looks like "1m" where you have a number plus a unit')
+    global_context.promql_expr_test.set_evaluation_interval(interval_without_unit, unit)
+
+
+@when('interval is `{interval}`')
+def step_impl(context, interval):
+    interval_without_unit, unit = resolve_time_and_unit(interval)
+    if interval_without_unit is None:
+        raise ValueError(f'Invalid interval time: {interval_without_unit}. ' +
+                           'A valid time looks like "1m" where you have a number plus a unit')
+    global_context.promql_expr_test.set_interval(interval_without_unit, unit)
+
+
+@when('evaluation time is `{eval_time}`')
+def step_impl(context, eval_time):
+    eval_time_without_unit, unit = resolve_time_and_unit(eval_time)
+    if eval_time_without_unit is None:
+        raise ValueError(f'Invalid evalution time: {eval_time}. ' +
+                           'A valid time looks like "1m" where you have a number plus a unit')
+    global_context.promql_expr_test.set_eval_time(eval_time_without_unit, unit)
+
+
+@when('variable `{variable}` is `{value}`')
+def step_impl(context, variable, value):
+    global_context.promql_expr_test.set_variable(variable, value)
+
+
+@then('Grafana panel `{panel_name}` with legend `{legend}` shows')
+def step_impl(context, panel_name, legend):
+    """
+    This step can have an empty legend. As 'behave' doesn't provide a way
+    to say it's empty we use EMPTY to mark as empty.
+    """
+    if legend == "EMPTY":
+        legend = ''
+    query_id = panel_name + '-' + legend
+    if query_id not in global_context.query_map:
+        raise KeyError((f'Query with legend {legend} in panel "{panel_name}"'
+                           'couldn\'t be found'))
+
+    expr = global_context.query_map[query_id]['query']
+    global_context.promql_expr_test.set_expression(expr)
+    for row in context.table:
+        metric = row['metrics']
+        value = row['values']
+        global_context.promql_expr_test.add_exp_samples(metric, float(value))
+    path = global_context.query_map[query_id]['path']
+    global_context.data['stats'][path]['tested'] += 1
+
+
+@then('query `{query}` produces')
+def step_impl(context, query):
+    global_context.promql_expr_test.set_expression(query)
+    for row in context.table:
+        metric = row['metrics']
+        value = row['values']
+        global_context.promql_expr_test.add_exp_samples(metric, float(value))
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature b/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature
new file mode 100644
index 000000000..51e3c5819
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/host-details.feature
@@ -0,0 +1,131 @@
+Feature: Host Details Dashboard
+
+Scenario: "Test OSD"
+  Given the following series:
+    | metrics | values |
+    | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.0",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+    | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.1",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+    | ceph_osd_metadata{job="ceph",back_iface="",ceph_daemon="osd.2",cluster_addr="192.168.1.12",device_class="hdd",front_iface="",hostname="127.0.0.1",objectstore="bluestore",public_addr="192.168.1.12",ceph_version="ceph version 17.0.0-8967-g6932a4f702a (6932a4f702a0d557fc36df3ca7a3bca70de42667) quincy (dev)"} | 1.0 |
+  When variable `ceph_hosts` is `127.0.0.1`
+  Then Grafana panel `OSDs` with legend `EMPTY` shows:
+    | metrics | values |
+    | {}      | 3      |
+
+# IOPS Panel - begin
+
+Scenario: "Test Disk IOPS - Writes - Several OSDs per device"
+  Given the following series:
+    | metrics | values |
+    | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Writes - Single OSD per device"
+  Given the following series:
+    | metrics | values |
+    | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) writes` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Reads - Several OSDs per device"
+  Given the following series:
+    | metrics | values |
+    | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0 osd.1 osd.2",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.3 osd.4 osd.5",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0 osd.1 osd.2", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.3 osd.4 osd.5", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test Disk IOPS - Reads - Single OSD per device"
+  Given the following series:
+    | metrics | values |
+    | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk IOPS` with legend `{{device}}({{ceph_daemon}}) reads` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+# IOPS Panel - end
+
+# Node disk bytes written/read panel - begin
+
+Scenario: "Test disk throughput - read"
+  Given the following series:
+    | metrics | values |
+    | node_disk_read_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_read_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) read` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+
+Scenario: "Test disk throughput - write"
+  Given the following series:
+    | metrics | values |
+    | node_disk_written_bytes_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_written_bytes_total{job="ceph",device="sdb",instance="localhost:9100"} | 100+600x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Throughput by Disk` with legend `{{device}}({{ceph_daemon}}) write` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+    | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 10 |
+
+# Node disk bytes written/read panel - end
+
+Scenario: "Test $ceph_hosts Disk Latency panel"
+  Given the following series:
+    | metrics | values |
+    | node_disk_write_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_write_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_writes_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_writes_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_read_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_read_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_reads_completed_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_reads_completed_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk Latency` with legend `{{device}}({{ceph_daemon}})` shows:
+    | metrics | values |
+    | {ceph_daemon="osd.0", device="sda", instance="localhost"} | 1 |
+    | {ceph_daemon="osd.1", device="sdb", instance="localhost"} | 1 |
+
+Scenario: "Test $ceph_hosts Disk utilization"
+  Given the following series:
+    | metrics | values |
+    | node_disk_io_time_seconds_total{job="ceph",device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_io_time_seconds_total{job="ceph",device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `ceph_hosts` is `localhost`
+  Then Grafana panel `$ceph_hosts Disk utilization` with legend `{{device}}({{ceph_daemon}})` shows:
+    | metrics | values |
+    | {job="ceph",ceph_daemon="osd.0", device="sda", instance="localhost"} | 100 |
+    | {job="ceph",ceph_daemon="osd.1", device="sdb", instance="localhost"} | 100 |
+
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature
new file mode 100644
index 000000000..6c5eceaed
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/hosts_overview.feature
@@ -0,0 +1,41 @@
+Feature: Hosts Overview Dashboard
+
+Scenario: "Test network load succeeds"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+  When variable `osd_hosts` is `127.0.0.1`
+  Then Grafana panel `Network Load` with legend `EMPTY` shows:
+    | metrics | values |
+    | {} | 6 |
+
+Scenario: "Test network load with bonding succeeds"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="bond0"} | 20 200 300 |
+    | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+  When variable `osd_hosts` is `127.0.0.1`
+  Then Grafana panel `Network Load` with legend `EMPTY` shows:
+    | metrics | values |
+    | {} | 6 |
+
+Scenario: "Test AVG Disk Utilization"
+  Given the following series:
+    | metrics | values |
+    | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_io_time_seconds_total{device="sdb",instance="localhost:9100"} | 10+60x1 |
+    | node_disk_io_time_seconds_total{device="sdc",instance="localhost:9100"} | 10 2000 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd_hosts` is `localhost`
+  Then Grafana panel `AVG Disk Utilization` with legend `EMPTY` shows:
+    | metrics | values |
+    | {} | 100 |
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature b/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature
new file mode 100644
index 000000000..0d6ca8b17
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/osd-device-details.feature
@@ -0,0 +1,88 @@
+Feature: OSD device details
+
+Scenario: "Test Physical Device Latency for $osd - Reads"
+  Given the following series:
+    | metrics | values |
+    | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 60 |
+    | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 60 |
+    | node_disk_read_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
+    | node_disk_read_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Reads` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 10 |
+
+Scenario: "Test Physical Device Latency for $osd - Writes"
+  Given the following series:
+    | metrics | values |
+    | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 60 |
+    | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 60 |
+    | node_disk_write_time_seconds_total{device="sda",instance="localhost"} | 100 600 |
+    | node_disk_write_time_seconds_total{device="sdb",instance="localhost"} | 100 600 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device Latency for $osd` with legend `{{instance}}/{{device}} Writes` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 10 |
+
+Scenario: "Test Physical Device R/W IOPS for $osd - Writes"
+  Given the following series:
+    | metrics | values |
+    | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
+    | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W IOPS for $osd - Reads"
+  Given the following series:
+    | metrics | values |
+    | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
+    | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W Bytes for $osd - Reads"
+  Given the following series:
+    | metrics | values |
+    | node_disk_reads_completed_total{device="sda",instance="localhost"} | 10 100 |
+    | node_disk_reads_completed_total{device="sdb",instance="localhost"} | 10 100 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Reads` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device R/W Bytes for $osd - Writes"
+  Given the following series:
+    | metrics | values |
+    | node_disk_writes_completed_total{device="sda",instance="localhost"} | 10 100 |
+    | node_disk_writes_completed_total{device="sdb",instance="localhost"} | 10 100 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device R/W IOPS for $osd` with legend `{{device}} on {{instance}} Writes` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 1.5 |
+
+Scenario: "Test Physical Device Util% for $osd"
+  Given the following series:
+    | metrics | values |
+    | node_disk_io_time_seconds_total{device="sda",instance="localhost:9100"} | 10 100 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.0",device="/dev/sda",instance="localhost:9283"} | 1.0 |
+    | ceph_disk_occupation_human{job="ceph",ceph_daemon="osd.1",device="/dev/sdb",instance="localhost:9283"} | 1.0 |
+  When variable `osd` is `osd.0`
+  Then Grafana panel `Physical Device Util% for $osd` with legend `{{device}} on {{instance}}` shows:
+    | metrics | values |
+    | {device="sda",instance="localhost"} | 1.5 |
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature
new file mode 100644
index 000000000..78d306419
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/osds-overview.feature
@@ -0,0 +1,15 @@
+Feature: OSD Overview
+
+Scenario: "Test OSD onode Hits Ratio"
+  Given the following series:
+    | metrics | values |
+    | ceph_bluestore_onode_hits{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 5255 |
+    | ceph_bluestore_onode_hits{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 5419 |
+    | ceph_bluestore_onode_hits{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 5242 |
+    | ceph_bluestore_onode_misses{ceph_daemon="osd.0",instance="ceph:9283",job="ceph"} | 202 |
+    | ceph_bluestore_onode_misses{ceph_daemon="osd.1",instance="ceph:9283",job="ceph"} | 247 |
+    | ceph_bluestore_onode_misses{ceph_daemon="osd.2",instance="ceph:9283",job="ceph"} | 234 |
+  Then Grafana panel `OSD onode Hits Ratio` with legend `EMPTY` shows:
+    | metrics | values |
+    | {} | 9.588529429483704E-01 |
+
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature
new file mode 100644
index 000000000..e0016c507
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw-detail.feature
@@ -0,0 +1,139 @@
+Feature: RGW Host Detail Dashboard
+
+Scenario: "Test $rgw_servers GET/PUT Latencies - GET"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+    | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `GET {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance_id="58892247"} | 1.5 |
+
+Scenario: "Test $rgw_servers GET/PUT Latencies - PUT"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+    | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `$rgw_servers GET/PUT Latencies` with legend `PUT {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance_id="58892247"} | 1 |
+
+Scenario: "Test Bandwidth by HTTP Operation - GET"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.1`
+  Then Grafana panel `Bandwidth by HTTP Operation` with legend `GETs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1.5 |
+
+Scenario: "Test Bandwidth by HTTP Operation - PUT"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.1`
+  Then Grafana panel `Bandwidth by HTTP Operation` with legend `PUTs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.1", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 7.5E-01 |
+
+Scenario: "Test HTTP Request Breakdown - Requests Failed"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `HTTP Request Breakdown` with legend `Requests Failed {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+
+Scenario: "Test HTTP Request Breakdown - GET"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `HTTP Request Breakdown` with legend `GETs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+
+Scenario: "Test HTTP Request Breakdown - PUT"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `HTTP Request Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+
+Scenario: "Test HTTP Request Breakdown - Other"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
+    | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+    | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `HTTP Request Breakdown` with legend `Other {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
+
+Scenario: "Test Workload Breakdown - Failures"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_failed_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 5 7 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `Workload Breakdown` with legend `Failures {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1E-01 |
+
+Scenario: "Test Workload Breakdown - GETs"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `Workload Breakdown` with legend `GETs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.1666666666666667 |
+
+Scenario: "Test Workload Breakdown - PUTs"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `Workload Breakdown` with legend `PUTs {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1.5 |
+
+Scenario: "Test Workload Breakdown - Other"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_req{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 175 250 345 |
+    | ceph_rgw_get{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 100 150 170 |
+    | ceph_rgw_put{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 70 90 160 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  And variable `rgw_servers` is `rgw.foo`
+  Then Grafana panel `Workload Breakdown` with legend `Other (DELETE,LIST) {{ceph_daemon}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph"} | .16666666666666652 |
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
new file mode 100644
index 000000000..642e43978
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature
@@ -0,0 +1,250 @@
+Feature: RGW Overview Dashboard
+
+Scenario: "Test Average GET Latencies"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+    | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+
+Scenario: "Test Average PUT Latencies"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+    | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+
+Scenario: "Test Total Requests/sec by RGW Instance"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
+    | metrics | values |
+    | {rgw_host="1"} | 1.5 |
+
+Scenario: "Test GET Latencies by RGW Instance"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 50 100 |
+    | ceph_rgw_get_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 20 60 80 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When interval is `30s`
+  Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1.5 |
+
+Scenario: "Test Bandwidth Consumed by Type- GET"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+  When evaluation time is `1m`
+  And interval is `30s`
+  Then Grafana panel `Bandwidth Consumed by Type` with legend `GETs` shows:
+    | metrics | values |
+    | {} | 1.5 |
+
+Scenario: "Test Bandwidth Consumed by Type- PUT"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+  When evaluation time is `1m`
+  And interval is `30s`
+  Then Grafana panel `Bandwidth Consumed by Type` with legend `PUTs` shows:
+    | metrics | values |
+    | {} | 7.5E-01 |
+
+Scenario: "Test Bandwidth by RGW Instance"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_get_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 10 50 100 |
+    | ceph_rgw_put_b{instance="127.0.0.1", instance_id="92806566", job="ceph"} | 5 20 50 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph"} | 1 1 1 |
+  When evaluation time is `1m`
+  And interval is `30s`
+  Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.1", instance_id="92806566", rgw_host="1"} | 2.25 |
+
+Scenario: "Test PUT Latencies by RGW Instance"
+  Given the following series:
+    | metrics | values |
+    | ceph_rgw_put_initial_lat_sum{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 15 35 55 |
+    | ceph_rgw_put_initial_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph"} | 10 30 50 |
+    | ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph"} | 1 1 1 |
+  When evaluation time is `1m`
+  And interval is `30s`
+  Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
+    | metrics | values |
+    | {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo"} | 1 |
+
+Scenario: "Test Total backend responses by HTTP code"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_http_responses_total{job="haproxy",code="200",instance="ingress.rgw.1",proxy="backend"} | 10 100 |
+    | haproxy_backend_http_responses_total{job="haproxy",code="404",instance="ingress.rgw.1",proxy="backend"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  When variable `code` is `200`
+  Then Grafana panel `Total responses by HTTP code` with legend `Backend {{ code }}` shows:
+    | metrics | values |
+    | {code="200"} | 1.5 |
+
+Scenario: "Test Total frontend responses by HTTP code"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_http_responses_total{job="haproxy",code="200",instance="ingress.rgw.1",proxy="frontend"} | 10 100 |
+    | haproxy_frontend_http_responses_total{job="haproxy",code="404",instance="ingress.rgw.1",proxy="frontend"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  When variable `code` is `200`
+  Then Grafana panel `Total responses by HTTP code` with legend `Frontend {{ code }}` shows:
+    | metrics | values |
+    | {code="200"} | 1.5 |
+
+Scenario: "Test Total http frontend requests by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_http_requests_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_http_requests_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Requests` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend response errors by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_response_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_response_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Response errors` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend requests errors by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_request_errors_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_request_errors_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Requests errors` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend redispatch warnings by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_redispatch_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_redispatch_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Backend redispatch` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend retry warnings by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_retry_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_retry_warnings_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Backend retry` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend requests denied by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_requests_denied_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_requests_denied_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Request denied` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend current queue by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_current_queue{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_current_queue{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total requests / responses` with legend `Backend Queued` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 200 |
+
+Scenario: "Test Total frontend connections by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_connections_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_connections_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total number of connections` with legend `Front` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend connections attempts by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_connection_attempts_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_connection_attempts_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total number of connections` with legend `Back` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total backend connections error by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_connection_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_connection_errors_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Total number of connections` with legend `Back errors` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 3 |
+
+Scenario: "Test Total frontend bytes incoming by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_bytes_in_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_bytes_in_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Front` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total frontend bytes outgoing by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_frontend_bytes_out_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_frontend_bytes_out_total{job="haproxy",proxy="frontend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Front` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total backend bytes incoming by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_bytes_in_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_bytes_in_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Current total of incoming / outgoing bytes` with legend `IN Back` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 24 |
+
+Scenario: "Test Total backend bytes outgoing by instance"
+  Given the following series:
+    | metrics | values |
+    | haproxy_backend_bytes_out_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 10 100 |
+    | haproxy_backend_bytes_out_total{job="haproxy",proxy="backend",instance="ingress.rgw.1"} | 20 200 |
+  When variable `ingress_service` is `ingress.rgw.1`
+  Then Grafana panel `Current total of incoming / outgoing bytes` with legend `OUT Back` shows:
+    | metrics | values |
+    | {instance="ingress.rgw.1"} | 24 |
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/self.feature b/monitoring/ceph-mixin/tests_dashboards/features/self.feature
new file mode 100644
index 000000000..2b44ce0dc
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/self.feature
@@ -0,0 +1,68 @@
+Feature: Test tester
+
+Scenario: "Simple query works"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+    | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+    | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+  Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+    | metrics | values |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 100 |
+
+Scenario: "Query with evaluation time"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+    | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+    | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+  When evaluation time is `0m`
+  Then query `node_network_transmit_bytes{instance="127.0.0.1"} > 0` produces:
+    | metrics | values |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with evaluation time and variable value"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 |
+    | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 |
+    | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 |
+    | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+  When evaluation time is `0m`
+  And variable `osd_hosts` is `127.0.0.1`
+  Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+    | metrics | values |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 |
+
+Scenario: "Query with interval time"
+  Given the following series:
+    | metrics | values |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+    | node_network_receive_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 10 100 200 |
+    | node_network_transmit_bytes{instance="192.168.100.2", device="bond0"} | 20 200 300 |
+    | node_network_transmit_bytes{instance="192.168.100.1", device="bond0"} | 20 200 300 |
+    | bonding_slaves{instance="127.0.0.1", master="bond0"} | 2 |
+  When evaluation time is `2h`
+  And evaluation interval is `1h`
+  And interval is `1h`
+  And variable `osd_hosts` is `127.0.0.1`
+  Then query `node_network_transmit_bytes{instance="$osd_hosts"} > 0` produces:
+    | metrics | values |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth1"} | 200 |
+    | node_network_transmit_bytes{instance="127.0.0.1", device="eth2"} | 200 |
+\ No newline at end of file
diff --git a/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py b/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py
new file mode 100644
index 000000000..0b90f46f2
--- /dev/null
+++ b/monitoring/ceph-mixin/tests_dashboards/features/steps/__init__.py
@@ -0,0 +1 @@
+# This file and steps files is needed even if its empty because of 'behave' :(