diff options
Diffstat (limited to 'monitoring/ceph-mixin/dashboards_out')
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/.lint | 5 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/ceph-cluster.json | 1244 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/cephfs-overview.json | 362 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/host-details.json | 1243 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/hosts-overview.json | 894 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/osd-device-details.json | 871 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/osds-overview.json | 963 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/pool-detail.json | 708 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/pool-overview.json | 1542 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/radosgw-detail.json | 542 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/radosgw-overview.json | 1266 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json | 504 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/rbd-details.json | 458 | ||||
-rw-r--r-- | monitoring/ceph-mixin/dashboards_out/rbd-overview.json | 737 |
14 files changed, 11339 insertions, 0 deletions
diff --git a/monitoring/ceph-mixin/dashboards_out/.lint b/monitoring/ceph-mixin/dashboards_out/.lint new file mode 100644 index 000000000..6352e858f --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/.lint @@ -0,0 +1,5 @@ +exclusions: + template-instance-rule: + reason: "Instance template not needed because of ceph-mgr leader election." + target-instance-rule: + reason: "Instance matcher not needed because of ceph-mgr leader election." diff --git a/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json b/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json new file mode 100644 index 000000000..6988a6299 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/ceph-cluster.json @@ -0,0 +1,1244 @@ +{ + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "heatmap", + "name": "Heatmap", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "vonage-status-panel", + "name": "Status Panel", + "version": "1.0.8" + } + ], + "annotations": { + "list": [] + }, + "description": "Ceph cluster overview", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1525415495309, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "rgba(50, 128, 45, 0.9)", + "rgba(237, 129, 40, 0.9)", + "rgb(255, 0, 0)" + ], + "datasource": "$datasource", + "editable": false, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 0 + }, + "hideTimeOverride": true, + "id": 21, + "interval": "1m", + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "ceph_health_status", + "format": "time_series", + "instant": true, + "interval": "$interval", + "intervalFactor": 1, + "refId": "A", + "step": 60 + } + ], + "thresholds": "1,2", + "timeFrom": null, + "title": "Health Status", + "transparent": false, + "type": "singlestat", + "valueFontSize": "50%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "0" + }, + { + "op": "=", + "text": "WARN", + "value": "1" + }, + { + "op": "=", + "text": "ERR", + "value": "2" + } + ], + "valueName": "current" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgb(255, 0, 0)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "$datasource", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 0 + }, + "id": 43, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "All", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "All", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "In", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "In", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Out", + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_in == bool 0)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "Out", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + }, + { + "aggregation": "Last", + "alias": "Up", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Up", + "refId": "D", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Down", + "crit": 2, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_osd_up == bool 0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1 + } + ], + "title": "OSDs", + "type": "vonage-status-panel" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "decimals": 2, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 47, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes_used)/sum(ceph_osd_stat_bytes)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Used", + "refId": "A" + } + ], + "thresholds": "0.7,0.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 53, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Active", + "color": "#508642", + "fill": 1, + "stack": "A" + }, + { + "alias": "Total", + "color": "#f9e2d2" + }, + { + "alias": "Degraded", + "color": "#eab839" + }, + { + "alias": "Undersized", + "color": "#f9934e" + }, + { + "alias": "Inconsistent", + "color": "#e24d42" + }, + { + "alias": "Down", + "color": "#bf1b00" + }, + { + "alias": "Inactive", + "color": "#bf1b00", + "fill": 4, + "linewidth": 0, + "stack": "A" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(ceph_pg_total)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "A" + }, + { + "expr": "sum(ceph_pg_active)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "B" + }, + { + "expr": "sum(ceph_pg_total - ceph_pg_active)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inactive", + "refId": "G" + }, + { + "expr": "sum(ceph_pg_undersized)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Undersized", + "refId": "F" + }, + { + "expr": "sum(ceph_pg_degraded)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Degraded", + "refId": "C" + }, + { + "expr": "sum(ceph_pg_inconsistent)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Inconsistent", + "refId": "D" + }, + { + "expr": "sum(ceph_pg_down)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Down", + "refId": "E" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "PG States", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 66, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Avg Apply Latency", + "color": "#7eb26d" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "quantile(0.95, ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Apply Latency P_95", + "refId": "A" + }, + { + "expr": "quantile(0.95, ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Commit Latency P_95", + "refId": "B" + }, + { + "expr": "avg(ceph_osd_apply_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Apply Latency", + "refId": "C" + }, + { + "expr": "avg(ceph_osd_commit_latency_ms)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Avg Commit Latency", + "refId": "D" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "clusterName": "", + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 1, + "datasource": "$datasource", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 0, + "y": 3 + }, + "id": 41, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "In Quorum", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "sum(ceph_mon_quorum_status)", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "In Quorum", + "refId": "A", + "units": "none", + "valueHandler": "Text Only" + }, + { + "aggregation": "Last", + "alias": "Total", + "crit": 1, + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Total", + "refId": "B", + "units": "none", + "valueHandler": "Text Only", + "warn": 2 + }, + { + "aggregation": "Last", + "alias": "MONs out of Quorum", + "crit": 1.6, + "decimals": 2, + "displayAliasType": "Warning / Critical", + "displayType": "Annotation", + "displayValueWithAlias": "Never", + "expr": "count(ceph_mon_quorum_status) / sum(ceph_mon_quorum_status)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MONs out of Quorum", + "refId": "C", + "units": "none", + "valueHandler": "Number Threshold", + "warn": 1.1 + } + ], + "title": "Monitors", + "type": "vonage-status-panel" + }, + { + "colorMode": "Panel", + "colors": { + "crit": "rgba(245, 54, 54, 0.9)", + "disable": "rgba(128, 128, 128, 0.9)", + "ok": "rgba(50, 128, 45, 0.9)", + "warn": "rgba(237, 129, 40, 0.9)" + }, + "cornerRadius": 0, + "datasource": "$datasource", + "displayName": "", + "flipCard": false, + "flipTime": 5, + "fontFormat": "Regular", + "gridPos": { + "h": 3, + "w": 2, + "x": 2, + "y": 3 + }, + "id": 68, + "isAutoScrollOnOverflow": false, + "isGrayOnNoData": false, + "isHideAlertsOnDisable": false, + "isIgnoreOKColors": false, + "links": [], + "targets": [ + { + "aggregation": "Last", + "alias": "Active", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 1) or vector(0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Active", + "refId": "A", + "units": "none", + "valueHandler": "Number Threshold" + }, + { + "aggregation": "Last", + "alias": "Standby", + "decimals": 2, + "displayAliasType": "Always", + "displayType": "Regular", + "displayValueWithAlias": "When Alias Displayed", + "expr": "count(ceph_mgr_status == 0) or vector(0)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Standby", + "refId": "B", + "units": "none", + "valueHandler": "Number Threshold" + } + ], + "title": "MGRs", + "type": "vonage-status-panel" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 45, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 0.5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_op_w_in_bytes[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "sum(irate(ceph_osd_op_r_out_bytes[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 62, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(deriv(ceph_pool_stored[1m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "In-/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": " Egress (-) / Ingress (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "rgb(0, 254, 255)", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "min": null, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "$datasource", + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 15 + }, + "heatmap": {}, + "highlightCards": true, + "id": 55, + "legend": { + "show": true + }, + "links": [], + "span": 12, + "targets": [ + { + "expr": "ceph_osd_stat_bytes_used / ceph_osd_stat_bytes", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "Util (%)", + "refId": "A", + "step": 60 + } + ], + "timeFrom": null, + "title": "OSD Capacity Utilization", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": 2, + "format": "percentunit", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": null + }, + { + "cards": { + "cardPadding": null, + "cardRound": 1 + }, + "color": { + "cardColor": "#b4ff00", + "colorScale": "sqrt", + "colorScheme": "interpolateBlues", + "exponent": 0.5, + "mode": "spectrum" + }, + "dataFormat": "timeseries", + "datasource": "$datasource", + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 15 + }, + "heatmap": {}, + "highlightCards": true, + "id": 59, + "legend": { + "show": true + }, + "links": [], + "targets": [ + { + "expr": "ceph_osd_numpg", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "#PGs", + "refId": "A" + } + ], + "title": "PGs per OSD", + "tooltip": { + "show": true, + "showHistogram": false + }, + "type": "heatmap", + "xAxis": { + "show": true + }, + "xBucketNumber": null, + "xBucketSize": "", + "yAxis": { + "decimals": null, + "format": "none", + "logBase": 1, + "max": null, + "min": null, + "show": true, + "splitFactor": null + }, + "yBucketNumber": null, + "yBucketSize": null + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 64, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(irate(ceph_osd_recovery_ops[1m]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Op/s", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Recovery Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": "Recovery Ops/s", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph", + "cluster" + ], + "templating": { + "list": [ + { + "hide": 0, + "label": null, + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "auto": true, + "auto_count": 10, + "auto_min": "1m", + "current": { + "text": "auto", + "value": "$__auto_interval_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph - Cluster", + "version": 13 + } diff --git a/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json new file mode 100644 index 000000000..3e7aeef45 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/cephfs-overview.json @@ -0,0 +1,362 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "MDS Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_objecter_op_r{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Ops", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_objecter_op_w{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Ops", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "MDS Workload - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": "Reads(-) / Writes (+)", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_mds_server_handle_client_request{job=~\"$job\", ceph_daemon=~\"($mds_servers).*\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ceph_daemon}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Client Request Load - $mds_servers", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": "Client Requests", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "MDS Server", + "multi": false, + "name": "mds_servers", + "options": [ ], + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "MDS Performance", + "uid": "tbO9LAiZz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/host-details.json b/monitoring/ceph-mixin/dashboards_out/host-details.json new file mode 100644 index 000000000..93c51f009 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/host-details.json @@ -0,0 +1,1243 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "$ceph_hosts System Overview", + "titleSize": "h6", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 1 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (ceph_daemon) (ceph_osd_metadata{job=~\"$job\", hostname='$ceph_hosts'}))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSDs", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "interrupt": "#447EBC", + "steal": "#6D1F62", + "system": "#890F02", + "user": "#3F6833", + "wait": "#C15C17" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Shows the CPU breakdown. When multiple servers are selected, only the first host's cpu data is shown", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 3, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode) (\n rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\", mode=~\"(irq|nice|softirq|steal|system|user|iowait)\"}[$__rate_interval])\n) / (\n scalar(\n sum(rate(node_cpu{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_cpu_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]))\n ) * 100\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{mode}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "% Utilization", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "Available": "#508642", + "Free": "#508642", + "Total": "#bf1b00", + "Used": "#bf1b00", + "total": "#bf1b00", + "used": "#0a50a1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 9, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "total", + "color": "#bf1b00", + "fill": 0, + "linewidth": 2, + "stack": false + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Free", + "refId": "A" + }, + { + "expr": "node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "total", + "refId": "B" + }, + { + "expr": "(\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) + (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "buffers/cache", + "refId": "C" + }, + { + "expr": "(\n node_memory_MemTotal{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemTotal_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n) - (\n (\n node_memory_MemFree{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_MemFree_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Cached{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Cached_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) + (\n node_memory_Buffers{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Buffers_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n ) +\n (\n node_memory_Slab{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"} or\n node_memory_Slab_bytes{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "used", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "RAM Usage", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "RAM used", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the network load (rx,tx) across all interfaces (excluding loopback 'lo')", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 10, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (device) (\n rate(\n node_network_receive_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "sum by (device) (\n rate(node_network_transmit_bytes{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\",device!=\"lo\"}[$__rate_interval])\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "decbytes", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 1 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "rate(node_network_transmit_drop{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_drop_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network drop rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Each OSD consists of a Journal/WAL partition and a data partition. The RAW Capacity shown is the sum of the data partitions across all OSDs on the selected OSD hosts.", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 3, + "x": 0, + "y": 6 + }, + "id": 8, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_osd_stat_bytes{job=~\"$job\"} and\n on (ceph_daemon) ceph_disk_occupation{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 5, + "w": 3, + "x": 21, + "y": 6 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*tx/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(node_network_receive_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_receive_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.rx", + "refId": "A" + }, + { + "expr": "rate(node_network_transmit_errs{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_network_transmit_errs_total{instance=~\"$ceph_hosts([\\\\\\\\.:].*)?\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}.tx", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network error rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "pps", + "label": "Send (-) / Receive (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 10, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Disk Performance Statistics", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For any OSD devices on the host, this chart shows the iops per physical device. Each device is shown by it's name and corresponding OSD id value", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 12 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n rate(node_disk_writes_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) writes", + "refId": "A" + }, + { + "expr": "label_replace(\n (\n rate(node_disk_reads_completed{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\"},\"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the disk bandwidth (read bytes/sec + write bytes/sec) of the physical OSD device. Each device is shown by device name, and corresponding OSD id", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 12 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n rate(node_disk_bytes_written{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_written_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) write", + "refId": "A" + }, + { + "expr": "label_replace(\n (\n rate(node_disk_bytes_read{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) or\n rate(node_disk_read_bytes_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])\n ),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") * on(instance, device)\n group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}}) read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Throughput by Disk", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "For OSD hosts, this chart shows the latency at the physical drive. Each drive is shown by device name, with it's corresponding OSD id", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 0, + "y": 21 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(instance, device) (label_replace(\n (rate(node_disk_write_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_writes_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001) or\n (rate(node_disk_read_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval])) /\n clamp_min(rate(node_disk_reads_completed_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]), 0.001),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show disk utilization % (util) of any OSD devices on the host by the physical device name and associated OSD id.", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 11, + "x": 12, + "y": 21 + }, + "id": 14, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n (\n (rate(node_disk_io_time_ms{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) / 10) or\n rate(node_disk_io_time_seconds_total{instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"}[$__rate_interval]) * 100\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(ceph_disk_occupation_human{job=~\"$job\", instance=~\"($ceph_hosts)([\\\\\\\\.:].*)?\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}}({{ceph_daemon}})", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$ceph_hosts Disk utilization", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": "%Util", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Hostname", + "multi": false, + "name": "ceph_hosts", + "options": [ ], + "query": "label_values({}, instance)", + "refresh": 1, + "regex": "([^.:]*).*", + "sort": 3, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Details", + "uid": "rtOg0AiWz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/hosts-overview.json b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json new file mode 100644 index 000000000..f1cd4c499 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/hosts-overview.json @@ -0,0 +1,894 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "OSD Hosts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG CPU Busy", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 4, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG RAM Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "IOPS Load at the device as reported by the OS on all OSD hosts", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Physical IOPS", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "AVG Disk Utilization", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total send/receive network load across all hosts in the ceph cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Network Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the top 10 busiest hosts by cpu", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 5 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Busy - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percent", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Top 10 hosts by network load", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 5 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Network Load - Top 10 Hosts", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "osd_hosts", + "options": [ ], + "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)", + "refresh": 1, + "regex": "([^.]*).*", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mon_hosts", + "options": [ ], + "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "mon.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "mds_hosts", + "options": [ ], + "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "mds.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "rgw_hosts", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "rgw.(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Host Overview", + "uid": "y0KGL0iZz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/osd-device-details.json b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json new file mode 100644 index 000000000..384516fb0 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/osd-device-details.json @@ -0,0 +1,871 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "OSD Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "read", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "read", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w_latency_sum{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "write", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd R/W IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "Read Bytes", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_osd_op_r_out_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read Bytes", + "refId": "A" + }, + { + "expr": "rate(ceph_osd_op_w_in_bytes{job=~\"$job\", ceph_daemon=~\"$osd\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write Bytes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$osd R/W Bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 6, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Physical Device Performance", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 11 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(\n label_replace(\n rate(node_disk_read_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n ) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Reads", + "refId": "A" + }, + { + "expr": "(\n label_replace(\n rate(node_disk_write_time_seconds_total{}[$__rate_interval]) /\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\") and on (instance, device)\n label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n )\n )\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}}/{{device}} Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device Latency for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 11 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_writes_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Writes", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(node_disk_reads_completed_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}} Reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device R/W IOPS for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 12, + "y": 11 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "/.*Reads/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_read_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Reads", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(node_disk_written_bytes_total{}[$__rate_interval]), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{instance}} {{device}} Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device R/W Bytes for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 6, + "x": 18, + "y": 11 + }, + "id": 10, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(node_disk_io_time_seconds_total{}[$__rate_interval]),\n \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n) and on (instance, device) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", ceph_daemon=~\"$osd\"}, \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^:.]*).*\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device}} on {{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Physical Device Util% for $osd", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "OSD", + "multi": false, + "name": "osd", + "options": [ ], + "query": "label_values(ceph_osd_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-3h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD device details", + "uid": "CrAHE0iZz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/osds-overview.json b/monitoring/ceph-mixin/dashboards_out/osds-overview.json new file mode 100644 index 000000000..5ea8955b2 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/osds-overview.json @@ -0,0 +1,963 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "type": "panel", + "version": "1.3.3" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { + "@95%ile": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG read", + "refId": "A" + }, + { + "expr": "max(\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX read", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Read Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "ceph_daemon", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (\n rate(ceph_osd_op_r_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_r_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n )\n ))\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest READ Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { + "@95%ile write": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval])\n * 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG write", + "refId": "A" + }, + { + "expr": "max(\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX write", + "refId": "B" + }, + { + "expr": "quantile(0.95, (\n rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000\n))\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile write", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Write Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 5, + "links": [ ], + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "ceph_daemon", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (rate(ceph_osd_op_w_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n on (ceph_daemon) rate(ceph_osd_op_w_latency_count{job=~\"$job\"}[$__rate_interval]) *\n 1000)\n ))\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest WRITE Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { }, + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count by (device_class) (ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device_class}}", + "refId": "A" + } + ], + "title": "OSD Types Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 4, + "y": 8 + }, + "id": 7, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A" + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes{job=~\"$job\"}) * count(ceph_osd_metadata{job=~\"$job\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B" + } + ], + "title": "OSD Objectstore Types", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { }, + "datasource": "$datasource", + "description": "The pie chart shows the various OSD sizes used within the cluster", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 8 + }, + "id": 8, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1TB", + "refId": "A" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2TB", + "refId": "B" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H" + }, + { + "expr": "count(ceph_osd_stat_bytes{job=~\"$job\"} >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB+", + "refId": "I" + } + ], + "title": "OSD Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 8 + }, + "id": 9, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg{job=~\"$job\"}", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs per OSD", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Distribution of PGs per OSD", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 20, + "mode": "histogram", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "# of OSDs", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "This gauge panel shows onode Hits ratio to help determine if increasing RAM per OSD could help improve the performance of the cluster", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 8 + }, + "id": 10, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_bluestore_onode_hits{job=~\"$job\"}) / (\n sum(ceph_bluestore_onode_hits{job=~\"$job\"}) +\n sum(ceph_bluestore_onode_misses{job=~\"$job\"})\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": ".75", + "title": "OSD onode Hits Ratio", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 11, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "R/W Profile", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the read/write workload profile overtime", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Read/Write Profile", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD Overview", + "uid": "lo02I1Aiz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/pool-detail.json b/monitoring/ceph-mixin/dashboards_out/pool-detail.json new file mode 100644 index 000000000..dc8b4152a --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/pool-detail.json @@ -0,0 +1,708 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "singlestat", + "name": "Singlestat", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 7, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_stored{job=~\"$job\"} / (ceph_pool_stored{job=~\"$job\"} + ceph_pool_max_avail{job=~\"$job\"})) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": ".7,.8", + "title": "Capacity used", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": 100, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Time till pool is full assuming the average fill rate of the last 6 hours", + "format": "s", + "gauge": { + "maxValue": false, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 7, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": "" + }, + "tableColumn": "", + "targets": [ + { + "expr": "(ceph_pool_max_avail{job=~\"$job\"} / deriv(ceph_pool_stored{job=~\"$job\"}[6h])) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"} > 0\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "current", + "title": "Time till full", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": false + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "deriv(ceph_pool_objects{job=~\"$job\"}[1m]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Objects per second", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Object Ingress/Egress", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ops", + "label": "Objects out(-) / in(+) ", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) *\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "iops", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + { + "alias": "reads", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "reads", + "refId": "A" + }, + { + "expr": "rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "writes", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Client Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Read (-) / Write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": { + "read_op_per_sec": "#3F6833", + "write_op_per_sec": "#E5AC0E" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_objects{job=~\"$job\"} *\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\", name=~\"$pool_name\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Number of Objects", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$pool_name Objects", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "Pool Name", + "multi": false, + "name": "pool_name", + "options": [ ], + "query": "label_values(ceph_pool_metadata{job=~\"$job\"}, name)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pool Details", + "uid": "-xyV8KCiz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/pool-overview.json b/monitoring/ceph-mixin/dashboards_out/pool-overview.json new file mode 100644 index 000000000..7f042aa5b --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/pool-overview.json @@ -0,0 +1,1542 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{job=~\"$job\"})", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Pools", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Count of the pools that have compression enabled", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 3, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"})", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Pools with Compression", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total raw capacity available to the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 6, + "y": 0 + }, + "id": 4, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_osd_stat_bytes{job=~\"$job\"})", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Total Raw Capacity", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total raw capacity consumed by user data and associated overheads (metadata + redundancy)", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 9, + "y": 0 + }, + "id": 5, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_bytes_used{job=~\"$job\"})", + "format": "", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Raw Capacity Consumed", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Total of client data stored in the cluster", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 12, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ceph_pool_stored{job=~\"$job\"})", + "format": "", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Logical Stored ", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "A compression saving is determined as the data eligible to be compressed minus the capacity used to store the data after compression", + "format": "bytes", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 15, + "y": 0 + }, + "id": 7, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"}\n)\n", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Savings", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "Indicates how suitable the data is within the pools that are/have been enabled for compression - averaged across all pools holding compressed data", + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 18, + "y": 0 + }, + "id": 8, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(\n sum(ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n sum(ceph_pool_stored_raw{job=~\"$job\"} and ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n) * 100\n", + "format": "table", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Eligibility", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "$datasource", + "description": "This factor describes the average ratio of data eligible to be compressed divided by the data actually stored. It does not account for data written that was ineligible for compression (too small, or compression yield too low)", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 3, + "w": 3, + "x": 21, + "y": 0 + }, + "id": 9, + "interval": null, + "links": [ ], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(\n ceph_pool_compress_under_bytes{job=~\"$job\"} > 0)\n / sum(ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n)\n", + "format": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "thresholds": "", + "title": "Compression Factor", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 3 + }, + "id": 10, + "links": [ ], + "sort": { + "col": 5, + "desc": true + }, + "styles": [ + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Time", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "instance", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "job", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Pool Name", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "name", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Pool ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool_id", + "thresholds": [ ], + "type": "hidden", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "Compression Factor", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "% Used", + "colorMode": "value", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [ + "70", + "85" + ], + "type": "number", + "unit": "percentunit", + "valueMaps": [ ] + }, + { + "alias": "Usable Free", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [ ], + "type": "number", + "unit": "bytes", + "valueMaps": [ ] + }, + { + "alias": "Compression Eligibility", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [ ], + "type": "number", + "unit": "percent", + "valueMaps": [ ] + }, + { + "alias": "Compression Savings", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [ ], + "type": "number", + "unit": "bytes", + "valueMaps": [ ] + }, + { + "alias": "Growth (5d)", + "colorMode": "value", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [ + "0", + "0" + ], + "type": "number", + "unit": "bytes", + "valueMaps": [ ] + }, + { + "alias": "IOPS", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [ ], + "type": "number", + "unit": "none", + "valueMaps": [ ] + }, + { + "alias": "Bandwidth", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [ ], + "type": "number", + "unit": "Bps", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "type", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "compression_mode", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Type", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "description", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Stored", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [ ], + "type": "number", + "unit": "bytes", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Compression", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ + { + "text": "ON", + "value": "1" + } + ] + } + ], + "targets": [ + { + "expr": "(\n ceph_pool_compress_under_bytes{job=~\"$job\"} /\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n) and on(pool_id) (\n (\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n ) * 100 > 0.5\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "A", + "refId": "A" + }, + { + "expr": "ceph_pool_max_avail{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "B", + "refId": "B" + }, + { + "expr": "(\n (ceph_pool_compress_under_bytes{job=~\"$job\"} > 0) /\n ceph_pool_stored_raw{job=~\"$job\"}\n) * 100\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "C", + "refId": "C" + }, + { + "expr": "ceph_pool_percent_used{job=~\"$job\"} *\n on(pool_id) group_left(name) ceph_pool_metadata{job=~\"$job\"}\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "D", + "refId": "D" + }, + { + "expr": "ceph_pool_compress_under_bytes{job=~\"$job\"} -\n ceph_pool_compress_bytes_used{job=~\"$job\"} > 0\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "E", + "refId": "E" + }, + { + "expr": "delta(ceph_pool_stored{job=~\"$job\"}[5d])", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "F", + "refId": "F" + }, + { + "expr": "rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval])\n + rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "G", + "refId": "G" + }, + { + "expr": "rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "H", + "refId": "H" + }, + { + "expr": "ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "I", + "refId": "I" + }, + { + "expr": "ceph_pool_stored{job=~\"$job\"} * on(pool_id) group_left ceph_pool_metadata{job=~\"$job\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "J", + "refId": "J" + }, + { + "expr": "ceph_pool_metadata{job=~\"$job\", compression_mode!=\"none\"}", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "K", + "refId": "K" + }, + { + "expr": "", + "format": "", + "intervalFactor": "", + "legendFormat": "L", + "refId": "L" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Overview", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "This chart shows the sum of read and write IOPS from all clients by pool", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 11, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,\n round(\n (\n rate(ceph_pool_rd{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval])\n ), 1\n ) * on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"})\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} ", + "refId": "A" + }, + { + "expr": "topk($topk,\n rate(ceph_pool_wr{job=~\"$job\"}[$__rate_interval]) +\n on(pool_id) group_left(instance,name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}} - write", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $topk Client IOPS by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "IOPS", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "The chart shows the sum of read and write bytes from all clients, by pool", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "topk($topk,\n (\n rate(ceph_pool_rd_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_pool_wr_bytes{job=~\"$job\"}[$__rate_interval])\n ) * on(pool_id) group_left(instance, name) ceph_pool_metadata{job=~\"$job\"}\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Top $topk Client Bandwidth by Pool", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": "Throughput", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Historical view of capacity usage, to help identify growth and trends in pool consumption", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 13, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_pool_bytes_used{job=~\"$job\"} * on(pool_id) group_right ceph_pool_metadata{job=~\"$job\"}", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{name}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Pool Capacity Usage (RAW)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Capacity Used", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 22, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "15", + "value": "15" + }, + "hide": 0, + "includeAll": false, + "label": "TopK", + "multi": false, + "name": "topk", + "options": [ + { + "text": "15", + "value": "15" + } + ], + "query": "15", + "refresh": 0, + "type": "custom" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Ceph Pools Overview", + "uid": "z99hzWtmk", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json new file mode 100644 index 000000000..a0f8f3537 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-detail.json @@ -0,0 +1,542 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "type": "panel", + "version": "1.3.3" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Host Detail : $rgw_servers", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance_id) (\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "sum by (instance_id) (\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "$rgw_servers GET/PUT Latencies", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 6, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by HTTP Operation", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + "GETs": "#7eb26d", + "Other": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b", + "Requests Failed": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 7, + "x": 13, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\",ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests Failed {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other {{ceph_daemon}}", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "HTTP Request Breakdown", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { + "Failures": "#bf1b00", + "GETs": "#7eb26d", + "Other (HEAD,POST,DELETE)": "#447ebc", + "PUTs": "#eab839", + "Requests": "#3f2b5b" + }, + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Under graph", + "pieType": "pie", + "targets": [ + { + "expr": "rate(ceph_rgw_failed_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Failures {{ceph_daemon}}", + "refId": "A" + }, + { + "expr": "rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs {{ceph_daemon}}", + "refId": "B" + }, + { + "expr": "rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs {{ceph_daemon}}", + "refId": "C" + }, + { + "expr": "(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) -\n (\n rate(ceph_rgw_get{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put{job=~\"$job\"}[$__rate_interval])\n )\n) * on (instance_id) group_left (ceph_daemon)\n ceph_rgw_metadata{job=~\"$job\", ceph_daemon=~\"$rgw_servers\"}\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}", + "refId": "D" + } + ], + "title": "Workload Breakdown", + "type": "grafana-piechart-panel", + "valueName": "current" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Instance Detail", + "uid": "x5ARzZtmk", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json new file mode 100644 index 000000000..77d69e4f3 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-overview.json @@ -0,0 +1,1266 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Overview - All Gateways", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GET {{rgw_host}}", + "refId": "A" + }, + { + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUT {{rgw_host}}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average GET/PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 7, + "x": 8, + "y": 1 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total Requests/sec by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "none", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 15, + "y": 1 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_get_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_get_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "GET Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out of all radosgw instances within the cluster", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "GETs", + "refId": "A" + }, + { + "expr": "sum(rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "PUTs", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth Consumed by Type", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Total bytes transferred in/out through get/put operations, by radosgw instance", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 7, + "x": 8, + "y": 8 + }, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rgw_put_b{job=~\"$job\"}[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Bandwidth by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 6, + "x": 15, + "y": 8 + }, + "id": 8, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "label_replace(\n rate(ceph_rgw_put_initial_lat_sum{job=~\"$job\"}[$__rate_interval]) /\n rate(ceph_rgw_put_initial_lat_count{job=~\"$job\"}[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{job=~\"$job\"},\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{rgw_host}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "PUT Latencies by RGW Instance", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "collapse": false, + "collapsed": false, + "gridPos": { + "h": 12, + "w": 9, + "x": 0, + "y": 12 + }, + "id": 9, + "panels": [ ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "RGW Overview - HAProxy Metrics", + "titleSize": "h6", + "type": "row" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 5, + "x": 0, + "y": 12 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Back.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*1.*/" + }, + { + "alias": "/.*2.*/" + }, + { + "alias": "/.*3.*/" + }, + { + "alias": "/.*4.*/" + }, + { + "alias": "/.*5.*/" + }, + { + "alias": "/.*other.*/" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n haproxy_frontend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"frontend\"}[$__rate_interval]\n )\n) by (code)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Frontend {{ code }}", + "refId": "A" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_http_responses_total{code=~\"$code\", job=~\"$job_haproxy\", instance=~\"$ingress_service\", proxy=~\"backend\"}[$__rate_interval]\n )\n) by (code)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Backend {{ code }}", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total responses by HTTP code", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 5, + "x": 5, + "y": 12 + }, + "id": 11, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Response.*/", + "transform": "negative-Y" + }, + { + "alias": "/.*Backend.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n haproxy_frontend_http_requests_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests", + "refId": "A" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_response_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Response errors", + "refId": "B" + }, + { + "expr": "sum(\n rate(\n haproxy_frontend_request_errors_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Requests errors", + "refId": "C" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_redispatch_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend redispatch", + "refId": "D" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_retry_warnings_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend retry", + "refId": "E" + }, + { + "expr": "sum(\n rate(\n haproxy_frontend_requests_denied_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Request denied", + "refId": "F" + }, + { + "expr": "sum(\n haproxy_backend_current_queue{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Backend Queued", + "refId": "G" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total requests / responses", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 5, + "x": 10, + "y": 12 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*Back.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n haproxy_frontend_connections_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Front", + "refId": "A" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_connection_attempts_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Back", + "refId": "B" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_connection_errors_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n )\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Back errors", + "refId": "C" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Total number of connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 12, + "w": 6, + "x": 15, + "y": 12 + }, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ + [ + { + "alias": "/.*OUT.*/", + "transform": "negative-Y" + } + ] + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(\n rate(\n haproxy_frontend_bytes_in_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "IN Front", + "refId": "A" + }, + { + "expr": "sum(\n rate(\n haproxy_frontend_bytes_out_total{proxy=~\"frontend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OUT Front", + "refId": "B" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_bytes_in_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "IN Back", + "refId": "C" + }, + { + "expr": "sum(\n rate(\n haproxy_backend_bytes_out_total{proxy=~\"backend\", job=~\"$job_haproxy\", instance=~\"$ingress_service\"}[$__rate_interval]\n ) * 8\n) by (instance)\n", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "OUT Back", + "refId": "D" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Current total of incoming / outgoing bytes", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "RGW Server", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "HTTP Code", + "multi": false, + "name": "code", + "options": [ ], + "query": "label_values(haproxy_server_http_responses_total{job=~\"$job_haproxy\", instance=~\"$ingress_service\"}, code)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job haproxy", + "multi": true, + "name": "job_haproxy", + "options": [ ], + "query": "label_values(haproxy_server_status, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "Ingress Service", + "multi": false, + "name": "ingress_service", + "options": [ ], + "query": "label_values(haproxy_server_status{job=~\"$job_haproxy\"}, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Overview", + "uid": "WAkugZpiz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json new file mode 100644 index 000000000..e0c3037d5 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/radosgw-sync-overview.json @@ -0,0 +1,504 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.0.0" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_sum{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Replication (throughput) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_bytes_count{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Replication (objects) from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Objects/s", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_poll_latency_sum{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Polling Request Latency from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 5, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (source_zone) (rate(ceph_data_sync_from_zone_fetch_errors{job=~\"$job\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{source_zone}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Unsuccessful Object Replications from Source Zone", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": "Count/s", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "", + "multi": false, + "name": "rgw_servers", + "options": [ ], + "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", + "refresh": 1, + "regex": "RGW Server", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RGW Sync Overview", + "uid": "rgw-sync-overview", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-details.json b/monitoring/ceph-mixin/dashboards_out/rbd-details.json new file mode 100644 index 000000000..f64de312a --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/rbd-details.json @@ -0,0 +1,458 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.3.3" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "Detailed Performance of RBD Images (IOPS/Throughput/Latency)", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_ops{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "iops", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_bytes{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(ceph_rbd_write_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_write_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Write", + "refId": "A" + }, + { + "expr": "rate(ceph_rbd_read_latency_sum{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval]) /\n rate(ceph_rbd_read_latency_count{job=~\"$job\", pool=\"$pool\", image=\"$image\"}[$__rate_interval])\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{pool}} Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "pool", + "options": [ ], + "query": "label_values(pool)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "", + "multi": false, + "name": "image", + "options": [ ], + "query": "label_values(image)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Details", + "uid": "YhCYGcuZz", + "version": 0 +} diff --git a/monitoring/ceph-mixin/dashboards_out/rbd-overview.json b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json new file mode 100644 index 000000000..e017280e0 --- /dev/null +++ b/monitoring/ceph-mixin/dashboards_out/rbd-overview.json @@ -0,0 +1,737 @@ +{ + "__inputs": [ ], + "__requires": [ + { + "id": "grafana", + "name": "Grafana", + "type": "grafana", + "version": "5.4.2" + }, + { + "id": "graph", + "name": "Graph", + "type": "panel", + "version": "5.0.0" + }, + { + "id": "prometheus", + "name": "Prometheus", + "type": "datasource", + "version": "5.0.0" + }, + { + "id": "table", + "name": "Table", + "type": "panel", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "showIn": 0, + "tags": [ ], + "type": "dashboard" + } + ] + }, + "description": "", + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "IOPS", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 0 + }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(sum(rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "round(\n sum(rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Write", + "refId": "A" + }, + { + "expr": "round(\n sum(rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval])) /\n sum(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]))\n)\n", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Read", + "refId": "B" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Average Latency", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "ns", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 5, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "IOPS", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "iops", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n (\n sort((\n rate(ceph_rbd_write_ops{job=~\"$job\"}[$__rate_interval]) +\n on (image, pool, namespace) rate(ceph_rbd_read_ops{job=~\"$job\"}[$__rate_interval])\n ))\n )\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest IOPS", + "transform": "table", + "type": "table" + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 6, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Throughput", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "Bps", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n sort(\n sum(\n rate(ceph_rbd_read_bytes{job=~\"$job\"}[$__rate_interval]) +\n rate(ceph_rbd_write_bytes{job=~\"$job\"}[$__rate_interval])\n ) by (pool, image, namespace)\n )\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Throughput", + "transform": "table", + "type": "table" + }, + { + "columns": [ ], + "datasource": "$datasource", + "description": "", + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 7 + }, + "id": 7, + "links": [ ], + "sort": { + "col": 3, + "desc": true + }, + "styles": [ + { + "alias": "Pool", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "pool", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Image", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "image", + "thresholds": [ ], + "type": "string", + "unit": "short", + "valueMaps": [ ] + }, + { + "alias": "Latency", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ ], + "type": "number", + "unit": "ns", + "valueMaps": [ ] + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "/.*/", + "thresholds": [ ], + "type": "hidden", + "unit": "short", + "valueMaps": [ ] + } + ], + "targets": [ + { + "expr": "topk(10,\n sum(\n rate(ceph_rbd_write_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_write_latency_count{job=~\"$job\"}[$__rate_interval]), 1) +\n rate(ceph_rbd_read_latency_sum{job=~\"$job\"}[$__rate_interval]) /\n clamp_min(rate(ceph_rbd_read_latency_count{job=~\"$job\"}[$__rate_interval]), 1)\n ) by (pool, image, namespace)\n)\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Highest Latency", + "transform": "table", + "type": "table" + } + ], + "refresh": "30s", + "rows": [ ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "ceph-mixin", + "overview" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 2, + "includeAll": true, + "label": "cluster", + "multi": true, + "name": "cluster", + "options": [ ], + "query": "label_values(ceph_osd_metadata, cluster)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { }, + "datasource": "$datasource", + "hide": 0, + "includeAll": true, + "label": "job", + "multi": true, + "name": "job", + "options": [ ], + "query": "label_values(ceph_osd_metadata{}, job)", + "refresh": 1, + "regex": "(.*)", + "sort": 1, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "RBD Overview", + "uid": "41FrpeUiz", + "version": 0 +} |