diff options
Diffstat (limited to 'monitoring/grafana/dashboards/osds-overview.json')
-rw-r--r-- | monitoring/grafana/dashboards/osds-overview.json | 876 |
1 files changed, 876 insertions, 0 deletions
diff --git a/monitoring/grafana/dashboards/osds-overview.json b/monitoring/grafana/dashboards/osds-overview.json new file mode 100644 index 00000000..4b91df9e --- /dev/null +++ b/monitoring/grafana/dashboards/osds-overview.json @@ -0,0 +1,876 @@ +{ + + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "grafana-piechart-panel", + "name": "Pie Chart", + "version": "1.3.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": false, + "gnetId": null, + "graphTooltip": 0, + "id": null, + "iteration": 1538083987689, + "links": [], + "panels": [ + { + "aliasColors": { + "@95%ile": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 0 + }, + "id": 12, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG read", + "refId": "A" + }, + { + "expr": "max (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX read", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Read Latencies", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest read latencies within the cluster", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 15, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_r_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_r_latency_count[1m]) * 1000)\n ))\n)\n\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Highest READ Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": { + "@95%ile write": "#e0752d" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "gridPos": { + "h": 8, + "w": 8, + "x": 12, + "y": 0 + }, + "id": 13, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "avg (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "AVG write", + "refId": "A" + }, + { + "expr": "max (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "MAX write", + "refId": "B" + }, + { + "expr": "quantile(0.95,\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "@95%ile write", + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "OSD Write Latencies", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "columns": [], + "datasource": "$datasource", + "description": "This table shows the osd's that are delivering the 10 highest write latencies within the cluster", + "fontSize": "100%", + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 16, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 2, + "desc": true + }, + "styles": [ + { + "alias": "OSD ID", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "ceph_daemon", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "Latency (ms)", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "pattern": "Value", + "thresholds": [], + "type": "number", + "unit": "none" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "hidden", + "unit": "short" + } + ], + "targets": [ + { + "expr": "topk(10,\n (sort(\n (irate(ceph_osd_op_w_latency_sum[1m]) / on (ceph_daemon) irate(ceph_osd_op_w_latency_count[1m]) * 1000)\n ))\n)\n\n", + "format": "table", + "instant": true, + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Highest WRITE Latencies", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "$datasource", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 0, + "y": 8 + }, + "id": 2, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count by (device_class) (ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{device_class}}", + "refId": "A" + } + ], + "title": "OSD Types Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": { + "Non-Encrypted": "#E5AC0E" + }, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "$datasource", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 4, + "y": 8 + }, + "height": "200px", + "hideTimeOverride": true, + "id": 4, + "interval": null, + "legend": { + "percentage": false, + "show": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "1", + "minSpan": 4, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "bluestore", + "refId": "A", + "step": 240 + }, + { + "expr": "count(ceph_osd_metadata) - count(ceph_bluefs_wal_total_bytes)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "B", + "step": 240 + }, + { + "expr": "absent(ceph_bluefs_wal_total_bytes)*count(ceph_osd_metadata)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "filestore", + "refId": "C", + "step": 240 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Objectstore Types", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": "0.05" + }, + "datasource": "$datasource", + "description": "The pie chart shows the various OSD sizes used within the cluster", + "fontSize": "80%", + "format": "none", + "gridPos": { + "h": 8, + "w": 4, + "x": 8, + "y": 8 + }, + "height": "220", + "hideTimeOverride": true, + "id": 8, + "interval": null, + "legend": { + "header": "", + "percentage": false, + "show": true, + "sideWidth": null, + "sortDesc": true, + "values": true + }, + "legendType": "Under graph", + "links": [], + "maxDataPoints": "", + "minSpan": 6, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": "1", + "targets": [ + { + "expr": "count(ceph_osd_stat_bytes < 1099511627776)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<1 TB", + "refId": "A", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 1099511627776 < 2199023255552)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<2 TB", + "refId": "B", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 2199023255552 < 3298534883328)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<3TB", + "refId": "C", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 3298534883328 < 4398046511104)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<4TB", + "refId": "D", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 4398046511104 < 6597069766656)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<6TB", + "refId": "E", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 6597069766656 < 8796093022208)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<8TB", + "refId": "F", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 8796093022208 < 10995116277760)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<10TB", + "refId": "G", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 10995116277760 < 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "<12TB", + "refId": "H", + "step": 2 + }, + { + "expr": "count(ceph_osd_stat_bytes >= 13194139533312)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "12TB+", + "refId": "I", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "OSD Size Summary", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Each bar indicates the number of OSD's that have a PG count in a specific range as shown on the x axis.", + "fill": 1, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 6, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "ceph_osd_numpg\n", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "PGs per OSD", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Distribution of PGs per OSD", + "tooltip": { + "shared": false, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": 20, + "mode": "histogram", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "# of OSDs", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 20, + "panels": [], + "title": "R/W Profile", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "description": "Show the read/write workload profile overtime", + "fill": 1, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "round(sum(irate(ceph_pool_rd[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Reads", + "refId": "A" + }, + { + "expr": "round(sum(irate(ceph_pool_wr[30s])))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "Writes", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Read/Write Profile", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "tags": [], + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data Source", + "name": "datasource", + "options": [], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "OSD Overview", + "uid": "lo02I1Aiz", + "version": 3 +} |