{ "__inputs": [ ], "__requires": [ { "id": "grafana", "name": "Grafana", "type": "grafana", "version": "5.3.2" }, { "id": "graph", "name": "Graph", "type": "panel", "version": "5.0.0" }, { "id": "singlestat", "name": "Singlestat", "type": "panel", "version": "5.0.0" } ], "annotations": { "list": [ { "builtIn": 1, "datasource": "-- Grafana --", "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "showIn": 0, "tags": [ ], "type": "dashboard" } ] }, "description": "", "editable": false, "gnetId": null, "graphTooltip": 0, "hideControls": false, "id": null, "links": [ ], "panels": [ { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 0, "y": 0 }, "id": 2, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "count(sum by (hostname) (ceph_osd_metadata{job=~\"$job\"}))", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "OSD Hosts", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "description": "Average CPU busy across all hosts (OSD, RGW, MON etc) within the cluster", "format": "percentunit", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 4, "y": 0 }, "id": 3, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "avg(1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "AVG CPU Busy", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "description": "Average Memory Usage across all hosts in the cluster (excludes buffer/cache usage)", "format": "percentunit", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 8, "y": 0 }, "id": 4, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "avg ((\n (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) - ((\n node_memory_MemFree{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemFree_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}) +\n (\n node_memory_Cached{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Cached_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Buffers{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Buffers_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n ) + (\n node_memory_Slab{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_Slab_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}\n )\n )\n) / (\n node_memory_MemTotal{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"} or\n node_memory_MemTotal_bytes{instance=~\"($osd_hosts|$rgw_hosts|$mon_hosts|$mds_hosts).*\"}\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "AVG RAM Utilization", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "description": "IOPS Load at the device as reported by the OS on all OSD hosts", "format": "none", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 12, "y": 0 }, "id": 5, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum ((\n rate(node_disk_reads_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_reads_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n) + (\n rate(node_disk_writes_completed{instance=~\"($osd_hosts).*\"}[$__rate_interval]) or\n rate(node_disk_writes_completed_total{instance=~\"($osd_hosts).*\"}[$__rate_interval])\n))\n", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Physical IOPS", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "description": "Average Disk utilization for all OSD data devices (i.e. excludes journal/WAL)", "format": "percent", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 16, "y": 0 }, "id": 6, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "avg (\n label_replace(\n (rate(node_disk_io_time_ms[$__rate_interval]) / 10 ) or\n (rate(node_disk_io_time_seconds_total[$__rate_interval]) * 100),\n \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n ) * on(instance, device) group_left(ceph_daemon) label_replace(\n label_replace(\n ceph_disk_occupation_human{job=~\"$job\", instance=~\"($osd_hosts).*\"},\n \"device\", \"$1\", \"device\", \"/dev/(.*)\"\n ), \"instance\", \"$1\", \"instance\", \"([^.:]*).*\"\n )\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "AVG Disk Utilization", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "cacheTimeout": null, "colorBackground": false, "colorValue": false, "colors": [ "#299c46", "rgba(237, 129, 40, 0.89)", "#d44a3a" ], "datasource": "$datasource", "description": "Total send/receive network load across all hosts in the ceph cluster", "format": "bytes", "gauge": { "maxValue": 100, "minValue": 0, "show": false, "thresholdLabels": false, "thresholdMarkers": true }, "gridPos": { "h": 5, "w": 4, "x": 20, "y": 0 }, "id": 7, "interval": null, "links": [ ], "mappingType": 1, "mappingTypes": [ { "name": "value to text", "value": 1 }, { "name": "range to text", "value": 2 } ], "maxDataPoints": 100, "nullPointMode": "connected", "nullText": null, "postfix": "", "postfixFontSize": "50%", "prefix": "", "prefixFontSize": "50%", "rangeMaps": [ { "from": "null", "text": "N/A", "to": "null" } ], "sparkline": { "fillColor": "rgba(31, 118, 189, 0.18)", "full": false, "lineColor": "rgb(31, 120, 193)", "show": false }, "tableColumn": "", "targets": [ { "expr": "sum (\n (\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n) +\nsum (\n (\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|mon_hosts|mds_hosts|rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n ) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\")\n)\n", "format": "time_series", "instant": true, "intervalFactor": 1, "legendFormat": "", "refId": "A" } ], "thresholds": "", "title": "Network Load", "type": "singlestat", "valueFontSize": "80%", "valueMaps": [ { "op": "=", "text": "N/A", "value": "null" } ], "valueName": "current" }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Show the top 10 busiest hosts by cpu", "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 0, "y": 5 }, "id": 8, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk(10,\n 100 * (\n 1 - (\n avg by(instance) (\n rate(node_cpu_seconds_total{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval]) or\n rate(node_cpu{mode='idle',instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\"}[$__rate_interval])\n )\n )\n )\n)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "CPU Busy - Top 10 Hosts", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "percent", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] }, { "aliasColors": { }, "bars": false, "dashLength": 10, "dashes": false, "datasource": "$datasource", "description": "Top 10 hosts by network load", "fill": 1, "fillGradient": 0, "gridPos": { "h": 9, "w": 12, "x": 12, "y": 5 }, "id": 9, "legend": { "alignAsTable": false, "avg": false, "current": false, "max": false, "min": false, "rightSide": false, "show": true, "sideWidth": null, "total": false, "values": false }, "lines": true, "linewidth": 1, "links": [ ], "nullPointMode": "null", "percentage": false, "pointradius": 5, "points": false, "renderer": "flot", "repeat": null, "seriesOverrides": [ ], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { "expr": "topk(10, (sum by(instance) (\n(\n rate(node_network_receive_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_receive_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) +\n(\n rate(node_network_transmit_bytes{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval]) or\n rate(node_network_transmit_bytes_total{instance=~\"($osd_hosts|$mon_hosts|$mds_hosts|$rgw_hosts).*\",device!=\"lo\"}[$__rate_interval])\n) unless on (device, instance)\n label_replace((bonding_slaves > 0), \"device\", \"$1\", \"master\", \"(.+)\"))\n))\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", "refId": "A" } ], "thresholds": [ ], "timeFrom": null, "timeShift": null, "title": "Network Load - Top 10 Hosts", "tooltip": { "shared": true, "sort": 0, "value_type": "individual" }, "type": "graph", "xaxis": { "buckets": null, "mode": "time", "name": null, "show": true, "values": [ ] }, "yaxes": [ { "format": "Bps", "label": null, "logBase": 1, "max": null, "min": 0, "show": true }, { "format": "short", "label": null, "logBase": 1, "max": null, "min": 0, "show": true } ] } ], "refresh": "30s", "rows": [ ], "schemaVersion": 16, "style": "dark", "tags": [ "ceph-mixin" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "hide": 0, "label": "Data Source", "name": "datasource", "options": [ ], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "allValue": ".+", "current": { }, "datasource": "$datasource", "hide": 2, "includeAll": true, "label": "cluster", "multi": true, "name": "cluster", "options": [ ], "query": "label_values(ceph_osd_metadata, cluster)", "refresh": 1, "regex": "(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": ".+", "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": "job", "multi": true, "name": "job", "options": [ ], "query": "label_values(ceph_osd_metadata{}, job)", "refresh": 1, "regex": "(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "osd_hosts", "options": [ ], "query": "label_values(ceph_disk_occupation{job=~\"$job\"}, exported_instance)", "refresh": 1, "regex": "([^.]*).*", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "mon_hosts", "options": [ ], "query": "label_values(ceph_mon_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "mon.(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "mds_hosts", "options": [ ], "query": "label_values(ceph_mds_inodes{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "mds.(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false }, { "allValue": null, "current": { }, "datasource": "$datasource", "hide": 0, "includeAll": true, "label": null, "multi": false, "name": "rgw_hosts", "options": [ ], "query": "label_values(ceph_rgw_metadata{job=~\"$job\"}, ceph_daemon)", "refresh": 1, "regex": "rgw.(.*)", "sort": 1, "tagValuesQuery": "", "tags": [ ], "tagsQuery": "", "type": "query", "useTags": false } ] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": { "refresh_intervals": [ "5s", "10s", "30s", "1m", "5m", "15m", "30m", "1h", "2h", "1d" ], "time_options": [ "5m", "15m", "1h", "6h", "12h", "24h", "2d", "7d", "30d" ] }, "timezone": "", "title": "Host Overview", "uid": "y0KGL0iZz", "version": 0 }