diff options
Diffstat (limited to 'web/gui/dashboard_info.js')
-rw-r--r-- | web/gui/dashboard_info.js | 925 |
1 files changed, 732 insertions, 193 deletions
diff --git a/web/gui/dashboard_info.js b/web/gui/dashboard_info.js index 35834aaf..db60fd81 100644 --- a/web/gui/dashboard_info.js +++ b/web/gui/dashboard_info.js @@ -334,9 +334,15 @@ netdataDashboard.menu = { }, 'postgres': { - title: 'Postgres', + title: 'PostgreSQL', icon: '<i class="fas fa-database"></i>', - info: 'Performance metrics for <b>PostgresSQL</b>, the object-relational database (ORDBMS).' + info: 'Performance metrics for <b>PostgreSQL</b>, the open source object-relational database management system (ORDBMS).' + }, + + 'pgbouncer': { + title: 'PgBouncer', + icon: '<i class="fas fa-exchange-alt"></i>', + info: 'Performance metrics for PgBouncer, an open source connection pooler for PostgreSQL.' }, 'redis': { @@ -461,8 +467,9 @@ netdataDashboard.menu = { }, 'chrony': { + title: 'Chrony', icon: '<i class="fas fa-clock"></i>', - info: 'chronyd parameters about the system’s clock performance.' + info: 'The system’s clock performance and peers activity status.' }, 'couchdb': { @@ -686,6 +693,12 @@ netdataDashboard.menu = { icon: '<i class="fas fa-shield-alt"></i>', info: 'Netdata keeps track of the current jail status by reading the Fail2ban log file.' }, + + 'wireguard': { + title: 'WireGuard', + icon: '<i class="fas fa-dragon"></i>', + info: 'VPN network interfaces and peers traffic.' + }, }; @@ -1123,8 +1136,7 @@ const netDuplexInfo = '<p>The interface\'s latest or current ' + '<a href="https://en.wikipedia.org/wiki/Autonegotiation" target="_blank">negotiated</a> with the device it is connected to.</p>' + '<p><b>Unknown</b> - the duplex mode can not be determined. ' + '<b>Half duplex</b> - the communication is one direction at a time. ' + - '<b>Full duplex</b> - the interface is able to send and receive data simultaneously.</p>' + - '<p><b>State map</b>: 0 - unknown, 1 - half, 2 - full.</p>' + '<b>Full duplex</b> - the interface is able to send and receive data simultaneously.</p>' const netOperstateInfo = '<p>The current ' + '<a href="https://datatracker.ietf.org/doc/html/rfc2863" target="_blank">operational state</a> of the interface.</p>' + '<p><b>Unknown</b> - the state can not be determined. ' + @@ -1133,10 +1145,8 @@ const netOperstateInfo = '<p>The current ' + '<b>LowerLayerDown</b> - the interface is down due to state of lower-layer interface(s). ' + '<b>Testing</b> - the interface is in testing mode, e.g. cable test. It can’t be used for normal traffic until tests complete. ' + '<b>Dormant</b> - the interface is L1 up, but waiting for an external event, e.g. for a protocol to establish. ' + - '<b>Up</b> - the interface is ready to pass packets and can be used.</p>' + - '<p><b>State map</b>: 0 - unknown, 1 - notpresent, 2 - down, 3 - lowerlayerdown, 4 - testing, 5 - dormant, 6 - up.</p>' -const netCarrierInfo = '<p>The current physical link state of the interface.</p>' + - '<p><b>State map</b>: 0 - down, 1 - up.</p>' + '<b>Up</b> - the interface is ready to pass packets and can be used.</p>' +const netCarrierInfo = 'The current physical link state of the interface.' const netSpeedInfo = 'The interface\'s latest or current speed that the network adapter ' + '<a href="https://en.wikipedia.org/wiki/Autonegotiation" target="_blank">negotiated</a> with the device it is connected to. ' + 'This does not give the max supported speed of the NIC.' @@ -1286,6 +1296,41 @@ const ebpfUDPrecv = 'Number of calls to <a href="https://learn.netdata.cloud/doc 'Netdata gives a summary for this chart in <a href="#ebpf_global_udp_bandwidth_call">Network Stack</a>. ' + 'When the integration is <a href="https://learn.netdata.cloud/guides/troubleshoot/monitor-debug-applications-ebpf" target="_blank">enabled</a>, Netdata shows UDP calls per <a href="#ebpf_apps_udp_recv">application</a>.' + ebpfChartProvides +const cgroupCPULimit = 'Total CPU utilization within the configured or system-wide (if not set) limits. When the CPU utilization of a cgroup exceeds the limit for the configured period, the tasks belonging to its hierarchy will be throttled and are not allowed to run again until the next period.' +const cgroupCPU = 'Total CPU utilization within the system-wide CPU resources (all cores). The amount of time spent by tasks of the cgroup in <a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user and kernel</a> modes.' +const cgroupThrottled = 'The percentage of runnable periods when tasks in a cgroup have been throttled. The tasks have not been allowed to run because they have exhausted all of the available time as specified by their CPU quota.' +const cgroupThrottledDuration = 'The total time duration for which tasks in a cgroup have been throttled. When an application has used its allotted CPU quota for a given period, it gets throttled until the next period.' +const cgroupCPUShared = '<p>The weight of each group living in the same hierarchy, that translates into the amount of CPU it is expected to get. The percentage of CPU assigned to the cgroup is the value of shares divided by the sum of all shares in all cgroups in the same level.</p> <p>For example, tasks in two cgroups that have <b>cpu.shares</b> set to 100 will receive equal CPU time, but tasks in a cgroup that has <b>cpu.shares</b> set to 200 receive twice the CPU time of tasks in a cgroup where <b>cpu.shares</b> is set to 100.</p>' +const cgroupCPUPerCore = 'Total CPU utilization per core within the system-wide CPU resources.' +const cgroupCPUSomePressure = 'CPU <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on CPU. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupCPUSomePressureStallTime = 'The amount of time some processes have been waiting for CPU time.' +const cgroupCPUFullPressure = 'CPU <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Full</b> indicates the share of time in which <b>all non-idle tasks</b> are stalled on CPU resource simultaneously. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupCPUFullPressureStallTime = 'The amount of time all non-idle processes have been stalled due to CPU congestion.' + +const cgroupMemUtilization = 'RAM utilization within the configured or system-wide (if not set) limits. When the RAM utilization of a cgroup exceeds the limit, OOM killer will start killing the tasks belonging to the cgroup.' +const cgroupMemUsageLimit = 'RAM usage within the configured or system-wide (if not set) limits. When the RAM usage of a cgroup exceeds the limit, OOM killer will start killing the tasks belonging to the cgroup.' +const cgroupMemUsage = 'The amount of used RAM and swap memory.' +const cgroupMem = 'Memory usage statistics. The individual metrics are described in the memory.stat section for <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/memory.html#per-memory-cgroup-local-status" target="_blank">cgroup-v1</a> and <a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files" target="_blank">cgroup-v2</a>.' +const cgroupMemFailCnt = 'The number of memory usage hits limits.' +const cgroupWriteback = '<b>Dirty</b> is the amount of memory waiting to be written to disk. <b>Writeback</b> is how much memory is actively being written to disk.' +const cgroupMemActivity = '<p>Memory accounting statistics.</p><p><b>In</b> - a page is accounted as either mapped anon page (RSS) or cache page (Page Cache) to the cgroup. <b>Out</b> - a page is unaccounted from the cgroup.</p>' +const cgroupPgFaults = '<p>Memory <a href="https://en.wikipedia.org/wiki/Page_fault" target="_blank">page fault</a> statistics.</p><p><b>Pgfault</b> - all page faults. <b>Swap</b> - major page faults.</p>' +const cgroupMemorySomePressure = 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on memory. In this state the CPU is still doing productive work. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupMemorySomePressureStallTime = 'The amount of time some processes have been waiting due to memory congestion.' +const cgroupMemoryFullPressure = 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Full</b> indicates the share of time in which <b>all non-idle tasks</b> are stalled on memory resource simultaneously. In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. This has severe impact on performance. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupMemoryFullPressureStallTime = 'The amount of time all non-idle processes have been stalled due to memory congestion.' + +const cgroupIO = 'The amount of data transferred to and from specific devices as seen by the CFQ scheduler. It is not updated when the CFQ scheduler is operating on a request queue.' +const cgroupServicedOps = 'The number of I/O operations performed on specific devices as seen by the CFQ scheduler.' +const cgroupQueuedOps = 'The number of requests queued for I/O operations.' +const cgroupMergedOps = 'The number of BIOS requests merged into requests for I/O operations.' +const cgroupThrottleIO = 'The amount of data transferred to and from specific devices as seen by the throttling policy.' +const cgroupThrottleIOServicesOps = 'The number of I/O operations performed on specific devices as seen by the throttling policy.' +const cgroupIOSomePressure = 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on I/O. In this state the CPU is still doing productive work. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupIOSomePRessureStallTime = 'The amount of time some processes have been waiting due to I/O congestion.' +const cgroupIOFullPressure = 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. <b>Full</b> line indicates the share of time in which <b>all non-idle tasks</b> are stalled on I/O resource simultaneously. In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. This has severe impact on performance. The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' +const cgroupIOFullPressureStallTime = 'The amount of time all non-idle processes have been stalled due to I/O congestion.' + netdataDashboard.context = { 'system.cpu': { info: function (os) { @@ -1706,6 +1751,11 @@ netdataDashboard.context = { info: 'Committed Memory, is the sum of all memory which has been allocated by processes.' }, + 'mem.real': { + colors: NETDATA.colors[3], + info: 'Total amount of real (physical) memory used.' + }, + 'mem.oom_kill': { info: 'The number of processes killed by '+ '<a href="https://en.wikipedia.org/wiki/Out_of_memory" target="_blank">Out of Memory</a> Killer. '+ @@ -1734,7 +1784,12 @@ netdataDashboard.context = { }, 'mem.available': { - info: 'Available Memory is estimated by the kernel, as the amount of RAM that can be used by userspace processes, without causing swapping.' + info: function (os) { + if (os === "freebsd") + return 'The amount of memory that can be used by user-space processes without causing swapping. Calculated as the sum of free, cached, and inactive memory.'; + else + return 'Available Memory is estimated by the kernel, as the amount of RAM that can be used by userspace processes, without causing swapping.'; + } }, 'mem.writeback': { @@ -3132,6 +3187,64 @@ netdataDashboard.context = { info: netMTUInfo }, + 'k8s.cgroup.net_net': { + mainheads: [ + function (_, id) { + var iface; + try { + iface = ' ' + id.substring(id.lastIndexOf('.net_') + 5, id.length); + } catch (e) { + iface = ''; + } + return netdataDashboard.gaugeChart('Received' + iface, '12%', 'received'); + + }, + function (_, id) { + var iface; + try { + iface = ' ' + id.substring(id.lastIndexOf('.net_') + 5, id.length); + } catch (e) { + iface = ''; + } + return netdataDashboard.gaugeChart('Sent' + iface, '12%', 'sent'); + } + ], + info: netBytesInfo + }, + 'k8s.cgroup.net_packets': { + info: netPacketsInfo + }, + 'k8s.cgroup.net_errors': { + info: netErrorsInfo + }, + 'k8s.cgroup.net_fifo': { + info: netFIFOInfo + }, + 'k8s.cgroup.net_drops': { + info: netDropsInfo + }, + 'k8s.cgroup.net_compressed': { + info: netCompressedInfo + }, + 'k8s.cgroup.net_events': { + info: netEventsInfo + }, + 'k8s.cgroup.net_operstate': { + info: netOperstateInfo + }, + 'k8s.cgroup.net_duplex': { + info: netDuplexInfo + }, + 'k8s.cgroup.net_carrier': { + info: netCarrierInfo + }, + 'k8s.cgroup.net_speed': { + info: netSpeedInfo + }, + 'k8s.cgroup.net_mtu': { + info: netMTUInfo + }, + // ------------------------------------------------------------------------ // WIRELESS NETWORK INTERFACES @@ -3690,7 +3803,7 @@ netdataDashboard.context = { // ------------------------------------------------------------------------ // POSTGRESQL - + // python version start 'postgres.db_stat_blks': { info: 'Blocks reads from disk or cache.<ul>' + '<li><strong>blks_read:</strong> number of disk blocks read in this database.</li>' + @@ -3772,7 +3885,153 @@ netdataDashboard.context = { '</ul>' + 'For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html" target="_blank">Preventing Transaction ID Wraparound Failures</a>.' }, + // python version end + 'postgres.connections_utilization': { + info: 'Connections in use as percentage of <i>max_connections</i>. Connection "slots" that are reserved for superusers (<i>superuser_reserved_connections</i>) are subtracted from the limit. If the utilization is 100% new connections will be accepted only for superusers, and no new replication connections will be accepted.' + }, + 'postgres.connections_usage': { + info: '<p>Connections usage. The maximum number of concurrent connections to the database server is <i>max_connections</i> minus <i>superuser_reserved_connections</i>.</p><p><b>Available</b> - new connections allowed. <b>Used</b> - connections currently in use.</p>' + }, + 'postgres.checkpoints': { + info: '<p>Number of checkpoints that have been performed. Checkpoints are periodic maintenance operations the database performs to make sure that everything it’s been caching in memory has been synchronized with the disk. It’s desirable when checkpoints are scheduled rather than requested, as the latter can indicate that your databases are under heavy load.</p><p><b>Scheduled</b> - checkpoints triggered due that the time elapsed from the previous checkpoint is more than pg setting <i>checkpoint_timeout</i>. <b>Requested</b> - checkpoints ran due to uncheckpointed WAL size grew to more than <i>max_wal_size</i> setting.</p>' + }, + 'postgres.checkpoint_time': { + info: '<p>Checkpoint timing information.</p><p><b>Write</b> - amount of time that has been spent in the portion of checkpoint processing where files are written to disk. <b>Sync</b> - amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk.</p>' + }, + 'postgres.bgwriter_buffers_alloc': { + info: 'Allocated and re-allocated buffers. If a backend process requests data it is either found in a block in shared buffer cache or the block has to be allocated (read from disk). The latter is counted as <b>Allocated</b>.' + }, + 'postgres.bgwriter_buffers_written': { + info: '<p>Amount of data flushed from memory to disk.</p><p><b>Checkpoint</b> - buffers written during checkpoints. <b>Backend</b> - buffers written directly by a backend. It may happen that a dirty page is requested by a backend process. In this case the page is synched to disk before the page is returned to the client. <b>Clean</b> - buffers written by the background writer. PostgreSQL may clear pages with a low usage count in advance. The process scans for dirty pages with a low usage count so that they could be cleared if necessay. Buffers written by this process increment the counter.</p>' + }, + 'postgres.bgwriter_maxwritten_clean': { + info: 'Number of times the background writer stopped a cleaning scan because it had written too many buffers (exceeding the value of <i>bgwriter_lru_maxpages</i>).' + }, + 'postgres.bgwriter_buffers_backend_fsync': { + info: 'Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write). Any values above zero can indicate problems with storage when fsync queue is completely filled. ' + }, + 'postgres.wal_archive_files': { + info: '<p>WAL archiving.</p><p><b>Ready</b> - WAL files waiting to be archived. A non-zero value can indicate <i>archive_command</i> is in error, see <a href="https://www.postgresql.org/docs/current/static/continuous-archiving.html" target="_blank">Continuous Archiving and Point-in-Time Recovery</a> <b>Done</b> - WAL files successfully archived.' + }, + 'postgres.autovacuum_workers': { + info: 'PostgreSQL databases require periodic maintenance known as vacuuming. For many installations, it is sufficient to let vacuuming be performed by the autovacuum daemon. For more information see <a href="https://www.postgresql.org/docs/current/static/routine-vacuuming.html#AUTOVACUUM" target="_blank">The Autovacuum Daemon</a>.' + }, + 'postgres.percent_towards_emergency_autovacuum': { + info: 'Percentage towards emergency autovacuum for one or more tables. A forced autovacuum will run once this value reaches 100%. For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND" target="_blank">Preventing Transaction ID Wraparound Failures</a>.' + }, + 'postgres.percent_towards_txid_wraparound': { + info: 'Percentage towards transaction wraparound. A transaction wraparound may occur when this value reaches 100%. For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND" target="_blank">Preventing Transaction ID Wraparound Failures</a>.' + }, + 'postgres.oldest_transaction_xid': { + info: 'The oldest current transaction ID (XID). If for some reason autovacuum fails to clear old XIDs from a table, the system will begin to emit warning messages when the database\'s oldest XIDs reach eleven million transactions from the wraparound point. For more information see <a href="https://www.postgresql.org/docs/current/routine-vacuuming.html#VACUUM-FOR-WRAPAROUND" target="_blank">Preventing Transaction ID Wraparound Failures</a>.' + }, + 'postgres.uptime': { + info: 'The time elapsed since the Postgres process was started.' + }, + 'postgres.replication_standby_app_wal_delta': { + info: '<p>Replication WAL delta.</p><p><b>SentDelta</b> - sent over the network. <b>WriteDelta</b> - written to disk. <b>FlushDelta</b> - flushed to disk. <b>ReplayDelta</b> - replayed into the database.</p>' + }, + 'postgres.replication_standby_app_wal_lag': { + info: '<p>Replication WAL lag.</p><p><b>WriteLag</b> - time elapsed between flushing recent WAL locally and receiving notification that the standby server has written it, but not yet flushed it or applied it. <b>FlushLag</b> - time elapsed between flushing recent WAL locally and receiving notification that the standby server has written and flushed it, but not yet applied it. <b>ReplayLag</b> - time elapsed between flushing recent WAL locally and receiving notification that the standby server has written, flushed and applied it.</p>' + }, + 'postgres.replication_slot_files': { + info: '<p>Replication slot files. For more information see <a href="https://www.postgresql.org/docs/current/static/warm-standby.html#STREAMING-REPLICATION-SLOTS" target="_blank">Replication Slots</a>.</p><p><b>WalKeep</b> - WAL files retained by the replication slot. <b>PgReplslotFiles</b> - files present in pg_replslot.</p>' + }, + + 'postgres.db_transactions_ratio': { + info: 'Percentage of commited/rollback transactions.' + }, + 'postgres.db_transactions': { + info: '<p>Number of transactions that have been performed</p><p><b>Commited</b> - transactions that have been committed. All changes made by the committed transaction become visible to others and are guaranteed to be durable if a crash occurs. <b>Rollback</b> - transactions that have been rolled back. Rollback aborts the current transaction and causes all the updates made by the transaction to be discarded. Single queries that have failed outside the transactions are also accounted as rollbacks.</p>' + }, + 'postgres.db_connections_utilization': { + info: 'Connections in use as percentage of the database\'s <i>CONNECTION LIMIT</i> (if set) or <i>max_connections</i>.' + }, + 'postgres.db_connections': { + info: 'Number of backends currently connected to this database.' + }, + 'postgres.db_buffer_cache_hit_ratio': { + info: 'Buffer cache hit ratio. When clients request data, postgres checks shared memory and if there are no relevant data there it has to read it from disk, thus queries become slower.' + }, + 'postgres.db_blocks_read': { + info: '<p>Number of blocks read from shared buffer cache or from disk.</p><p><b>disk</b> - number of disk blocks read. <b>memory</b> - number of times disk blocks were found already in the buffer cache, so that a read was not necessary (this only includes hits in the PostgreSQL buffer cache, not the operating system\'s file system cache).</p>' + }, + 'postgres.db_rows_read_ratio': { + info: 'Percentage of returned/fetched rows.' + }, + 'postgres.db_rows_read': { + info: '<p>Read queries throughput.</p><p><b>Returned</b> - number of rows returned by queries. The value keeps track of the number of rows read/scanned, not the rows actually returned to the client. <b>Fetched</b> - number of rows fetched that contained data necessary to execute the query successfully.</p>' + }, + 'postgres.db_rows_written': { + info: '<p>Write queries throughput.</p><p><b>Inserted</b> - number of rows inserted by queries. <b>Deleted</b> - number of rows deleted by queries. <b>Updated</b> - number of rows updated by queries.</p>' + }, + 'postgres.db_conflicts': { + info: 'Number of queries canceled due to conflicts with recovery. Conflicts occur only on standby servers.' + }, + 'postgres.db_conflicts_stat': { + info: '<p>Number of queries canceled due to conflicts with recovery.</p><p><b>Tablespace</b> - queries that have been canceled due to dropped tablespaces. <b>Lock</b> - queries that have been canceled due to lock timeouts. <b>Snapshot</b> - queries that have been canceled due to old snapshots. <b>Bufferpin</b> - queries that have been canceled due to pinned buffers. <b>Deadlock</b> - queries that have been canceled due to deadlocks.</p>' + }, + 'postgres.db_deadlocks': { + info: 'Number of detected deadlocks. When a transaction cannot acquire the requested lock within a certain amount of time (configured by <b>deadlock_timeout</b>), it begins deadlock detection.' + }, + 'postgres.db_locks_held': { + info: 'Number of held locks. Some of these lock modes are acquired by PostgreSQL automatically before statement execution, while others are provided to be used by applications. All lock modes acquired in a transaction are held for the duration of the transaction. For lock modes details, see <a href="https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-TABLES" target="_blank">table-level locks</a>.' + }, + 'postgres.db_locks_awaited': { + info: 'Number of awaited locks. It indicates that some transaction is currently waiting to acquire a lock, which implies that some other transaction is holding a conflicting lock mode on the same lockable object. For lock modes details, see <a href="https://www.postgresql.org/docs/current/explicit-locking.html#LOCKING-TABLES" target="_blank">table-level locks</a>.' + }, + 'postgres.db_temp_files': { + info: 'Number of temporary files created by queries. Complex queries may require more memory than is available (specified by <b>work_mem</b>). When this happens, Postgres reverts to using temporary files - they are actually stored on disk, but only exist for the duration of the request. After the request returns, the temporary files are deleted.' + }, + 'postgres.db_temp_files_data': { + info: 'Amount of data written temporarily to disk to execute queries.' + }, + 'postgres.db_size': { + info: 'Actual on-disk usage of the database\'s data directory and any associated tablespaces.' + }, + + // ------------------------------------------------------------------------ + // PgBouncer + 'pgbouncer.client_connections_utilization': { + info: 'Client connections in use as percentage of <i>max_client_conn</i> (default 100).' + }, + 'pgbouncer.db_client_connections': { + info: '<p>Client connections in different states.</p><p><b>Active</b> - linked to server connection and can process queries. <b>Waiting</b> - have sent queries but have not yet got a server connection. <b>CancelReq</b> - have not forwarded query cancellations to the server yet.</p>' + }, + 'pgbouncer.db_server_connections': { + info: '<p>Server connections in different states.</p><p><b>Active</b> - linked to a client. <b>Idle</b> - unused and immediately usable for client queries. <b>Used</b> - have been idle for more than <i>server_check_delay</i>, so they need <i>server_check_query</i> to run on them before they can be used again. <b>Tested</b> - currently running either <i>server_reset_query</i> or <i>server_check_query</i>. <b>Login</b> - currently in the process of logging in.</p>' + }, + 'pgbouncer.db_server_connections_utilization': { + info: 'Server connections in use as percentage of <i>max_db_connections</i> (default 0 - unlimited). This considers the PgBouncer database that the client has connected to, not the PostgreSQL database of the outgoing connection.' + }, + 'pgbouncer.db_clients_wait_time': { + info: 'Time spent by clients waiting for a server connection. This shows if the decrease in database performance from the client\'s point of view was due to exhaustion of the corresponding PgBouncer pool.' + }, + 'pgbouncer.db_client_max_wait_time': { + info: 'Waiting time for the first (oldest) client in the queue. If this starts increasing, then the current pool of servers does not handle requests quickly enough.' + }, + 'pgbouncer.db_transactions': { + info: 'SQL transactions pooled (proxied) by pgbouncer.' + }, + 'pgbouncer.db_transactions_time': { + info: 'Time spent by pgbouncer when connected to PostgreSQL in a transaction, either idle in transaction or executing queries.' + }, + 'pgbouncer.db_transaction_avg_time': { + info: 'Average transaction duration.' + }, + 'pgbouncer.db_queries': { + info: 'SQL queries pooled (proxied) by pgbouncer.' + }, + 'pgbouncer.db_queries_time': { + info: 'Time spent by pgbouncer when actively connected to PostgreSQL, executing queries.' + }, + 'pgbouncer.db_query_avg_time': { + info: 'Average query duration.' + }, + 'pgbouncer.db_network_io': { + info: '<p>Network traffic received and sent by pgbouncer.</p><p><b>Received</b> - received from clients. <b>Sent</b> - sent to servers.</p>' + }, // ------------------------------------------------------------------------ // APACHE @@ -4000,8 +4259,7 @@ netdataDashboard.context = { 'cgroup.cpu_limit': { valueRange: "[0, null]", mainheads: [ - function (os, id) { - void (os); + function (_, id) { cgroupCPULimitIsSet = 1; return '<div data-netdata="' + id + '"' + ' data-dimensions="used"' @@ -4018,15 +4276,11 @@ netdataDashboard.context = { + ' role="application"></div>'; } ], - info: 'Total CPU utilization within the configured or system-wide (if not set) limits. '+ - 'When the CPU utilization of a cgroup exceeds the limit for the configured period, '+ - 'the tasks belonging to its hierarchy will be throttled and are not allowed to run again until the next period.' + info: cgroupCPULimit }, - 'cgroup.cpu': { mainheads: [ - function (os, id) { - void (os); + function (_, id) { if (cgroupCPULimitIsSet === 0) { return '<div data-netdata="' + id + '"' + ' data-chart-library="gauge"' @@ -4043,60 +4297,107 @@ netdataDashboard.context = { return ''; } ], - info: 'Total CPU utilization within the system-wide CPU resources (all cores). '+ - 'The amount of time spent by tasks of the cgroup in '+ - '<a href="https://en.wikipedia.org/wiki/CPU_modes#Mode_types" target="_blank">user and kernel</a> modes.' + info: cgroupCPU }, - 'cgroup.throttled': { - info: 'The percentage of runnable periods when tasks in a cgroup have been throttled. '+ - 'The tasks have not been allowed to run because they have exhausted all of the available time as specified by their CPU quota.' + info: cgroupThrottled }, - 'cgroup.throttled_duration': { - info: 'The total time duration for which tasks in a cgroup have been throttled. '+ - 'When an application has used its allotted CPU quota for a given period, it gets throttled until the next period.' + info: cgroupThrottledDuration }, - 'cgroup.cpu_shares': { - info: '<p>The weight of each group living in the same hierarchy, that translates into the amount of CPU it is expected to get. '+ - 'The percentage of CPU assigned to the cgroup is the value of shares divided by the sum of all shares in all cgroups in the same level.</p>'+ - '<p>For example, tasks in two cgroups that have <b>cpu.shares</b> set to 100 will receive equal CPU time, '+ - 'but tasks in a cgroup that has <b>cpu.shares</b> set to 200 receive twice the CPU time of tasks in a cgroup where <b>cpu.shares</b> is set to 100.</p>' + info: cgroupCPUShared }, - 'cgroup.cpu_per_core': { - info: 'Total CPU utilization per core within the system-wide CPU resources.' + info: cgroupCPUPerCore }, - 'cgroup.cpu_some_pressure': { - info: 'CPU <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+ - '<b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on CPU. ' + - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupCPUSomePressure }, 'cgroup.cpu_some_pressure_stall_time': { - info: 'The amount of time some processes have been waiting for CPU time.' + info: cgroupCPUSomePressureStallTime }, - 'cgroup.cpu_full_pressure': { - info: 'CPU <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. ' + - '<b>Full</b> indicates the share of time in which <b>all non-idle tasks</b> are stalled on CPU resource simultaneously. ' + - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupCPUFullPressure }, 'cgroup.cpu_full_pressure_stall_time': { - info: 'The amount of time all non-idle processes have been stalled due to CPU congestion.' + info: cgroupCPUFullPressureStallTime }, - 'cgroup.mem_utilization': { - info: 'RAM utilization within the configured or system-wide (if not set) limits. '+ - 'When the RAM utilization of a cgroup exceeds the limit, '+ - 'OOM killer will start killing the tasks belonging to the cgroup.' + 'k8s.cgroup.cpu_limit': { + valueRange: "[0, null]", + mainheads: [ + function (_, id) { + cgroupCPULimitIsSet = 1; + return '<div data-netdata="' + id + '"' + + ' data-dimensions="used"' + + ' data-gauge-max-value="100"' + + ' data-chart-library="gauge"' + + ' data-title="CPU"' + + ' data-units="%"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[4] + '"' + + ' role="application"></div>'; + } + ], + info: cgroupCPULimit + }, + 'k8s.cgroup.cpu': { + mainheads: [ + function (_, id) { + if (cgroupCPULimitIsSet === 0) { + return '<div data-netdata="' + id + '"' + + ' data-chart-library="gauge"' + + ' data-title="CPU"' + + ' data-units="%"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[4] + '"' + + ' role="application"></div>'; + } else + return ''; + } + ], + info: cgroupCPU + }, + 'k8s.cgroup.throttled': { + info: cgroupThrottled + }, + 'k8s.cgroup.throttled_duration': { + info: cgroupThrottledDuration + }, + 'k8s.cgroup.cpu_shares': { + info: cgroupCPUShared + }, + 'k8s.cgroup.cpu_per_core': { + info: cgroupCPUPerCore + }, + 'k8s.cgroup.cpu_some_pressure': { + info: cgroupCPUSomePressure + }, + 'k8s.cgroup.cpu_some_pressure_stall_time': { + info: cgroupCPUSomePressureStallTime + }, + 'k8s.cgroup.cpu_full_pressure': { + info: cgroupCPUFullPressure + }, + 'k8s.cgroup.cpu_full_pressure_stall_time': { + info: cgroupCPUFullPressureStallTime }, + 'cgroup.mem_utilization': { + info: cgroupMemUtilization + }, 'cgroup.mem_usage_limit': { mainheads: [ - function (os, id) { - void (os); + function (_, id) { cgroupMemLimitIsSet = 1; return '<div data-netdata="' + id + '"' + ' data-dimensions="used"' @@ -4114,15 +4415,11 @@ netdataDashboard.context = { + ' role="application"></div>'; } ], - info: 'RAM usage within the configured or system-wide (if not set) limits. '+ - 'When the RAM usage of a cgroup exceeds the limit, '+ - 'OOM killer will start killing the tasks belonging to the cgroup.' + info: cgroupMemUsageLimit }, - 'cgroup.mem_usage': { mainheads: [ - function (os, id) { - void (os); + function (_, id) { if (cgroupMemLimitIsSet === 0) { return '<div data-netdata="' + id + '"' + ' data-chart-library="gauge"' @@ -4139,79 +4436,125 @@ netdataDashboard.context = { return ''; } ], - info: 'The amount of used RAM and swap memory.' + info: cgroupMemUsage }, - 'cgroup.mem': { - info: 'Memory usage statistics. '+ - 'The individual metrics are described in the memory.stat section for '+ - '<a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/memory.html#per-memory-cgroup-local-status" target="_blank">cgroup-v1 </a>'+ - 'and '+ - '<a href="https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html#memory-interface-files" target="_blank">cgroup-v2</a>.' + info: cgroupMem }, - 'cgroup.mem_failcnt': { - info: 'The number of memory usage hits limits.' + info: cgroupMemFailCnt }, - 'cgroup.writeback': { - info: '<b>Dirty</b> is the amount of memory waiting to be written to disk. <b>Writeback</b> is how much memory is actively being written to disk.' + info: cgroupWriteback }, - 'cgroup.mem_activity': { - info: '<p>Memory accounting statistics.</p>'+ - '<p><b>In</b> - a page is accounted as either mapped anon page (RSS) or cache page (Page Cache) to the cgroup. '+ - '<b>Out</b> - a page is unaccounted from the cgroup.</p>' + info: cgroupMemActivity }, - 'cgroup.pgfaults': { - info: '<p>Memory <a href="https://en.wikipedia.org/wiki/Page_fault" target="_blank">page fault</a> statistics.</p>'+ - '<p><b>Pgfault</b> - all page faults. '+ - '<b>Swap</b> - major page faults.</p>' + info: cgroupPgFaults }, - 'cgroup.memory_some_pressure': { - info: 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+ - '<b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on memory. ' + - 'In this state the CPU is still doing productive work. '+ - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupMemorySomePressure }, 'cgroup.memory_some_pressure_stall_time': { - info: 'The amount of time some processes have been waiting due to memory congestion.' + info: cgroupMemorySomePressureStallTime }, - 'cgroup.memory_full_pressure': { - info: 'Memory <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. ' + - '<b>Full</b> indicates the share of time in which <b>all non-idle tasks</b> are stalled on memory resource simultaneously. ' + - 'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. '+ - 'This has severe impact on performance. '+ - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupMemoryFullPressure }, 'cgroup.memory_full_pressure_stall_time': { - info: 'The amount of time all non-idle processes have been stalled due to memory congestion.' + info: cgroupMemoryFullPressureStallTime }, - 'cgroup.io': { - info: 'The amount of data transferred to and from specific devices as seen by the CFQ scheduler. '+ - 'It is not updated when the CFQ scheduler is operating on a request queue.' + 'k8s.cgroup.mem_utilization': { + info: cgroupMemUtilization + }, + 'k8s.cgroup.mem_usage_limit': { + mainheads: [ + function (_, id) { + cgroupMemLimitIsSet = 1; + return '<div data-netdata="' + id + '"' + + ' data-dimensions="used"' + + ' data-append-options="percentage"' + + ' data-gauge-max-value="100"' + + ' data-chart-library="gauge"' + + ' data-title="Memory"' + + ' data-units="%"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[1] + '"' + + ' role="application"></div>'; + } + ], + info: cgroupMemUsageLimit + }, + 'k8s.cgroup.mem_usage': { + mainheads: [ + function (_, id) { + if (cgroupMemLimitIsSet === 0) { + return '<div data-netdata="' + id + '"' + + ' data-chart-library="gauge"' + + ' data-title="Memory"' + + ' data-units="MB"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[1] + '"' + + ' role="application"></div>'; + } else + return ''; + } + ], + info: cgroupMemUsage + }, + 'k8s.cgroup.mem': { + info: cgroupMem + }, + 'k8s.cgroup.mem_failcnt': { + info: cgroupMemFailCnt + }, + 'k8s.cgroup.writeback': { + info: cgroupWriteback + }, + 'k8s.cgroup.mem_activity': { + info: cgroupMemActivity + }, + 'k8s.cgroup.pgfaults': { + info: cgroupPgFaults + }, + 'k8s.cgroup.memory_some_pressure': { + info: cgroupMemorySomePressure + }, + 'k8s.cgroup.memory_some_pressure_stall_time': { + info: cgroupMemorySomePressureStallTime + }, + 'k8s.cgroup.memory_full_pressure': { + info: cgroupMemoryFullPressure + }, + 'k8s.cgroup.memory_full_pressure_stall_time': { + info: cgroupMemoryFullPressureStallTime }, + 'cgroup.io': { + info: cgroupIO + }, 'cgroup.serviced_ops': { - info: 'The number of I/O operations performed on specific devices as seen by the CFQ scheduler.' + info: cgroupServicedOps }, - 'cgroup.queued_ops': { - info: 'The number of requests queued for I/O operations.' + info: cgroupQueuedOps }, - 'cgroup.merged_ops': { - info: 'The number of BIOS requests merged into requests for I/O operations.' + info: cgroupMergedOps }, - 'cgroup.throttle_io': { mainheads: [ - function (os, id) { - void (os); + function (_, id) { return '<div data-netdata="' + id + '"' + ' data-dimensions="read"' + ' data-chart-library="gauge"' @@ -4225,8 +4568,7 @@ netdataDashboard.context = { + ' data-colors="' + NETDATA.colors[2] + '"' + ' role="application"></div>'; }, - function (os, id) { - void (os); + function (_, id) { return '<div data-netdata="' + id + '"' + ' data-dimensions="write"' + ' data-chart-library="gauge"' @@ -4241,214 +4583,220 @@ netdataDashboard.context = { + ' role="application"></div>'; } ], - info: 'The amount of data transferred to and from specific devices as seen by the throttling policy.' + info: cgroupThrottleIO }, - 'cgroup.throttle_serviced_ops': { - info: 'The number of I/O operations performed on specific devices as seen by the throttling policy.' + info: cgroupThrottleIOServicesOps }, - 'cgroup.io_some_pressure': { - info: 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. '+ - '<b>Some</b> indicates the share of time in which at least <b>some tasks</b> are stalled on I/O. ' + - 'In this state the CPU is still doing productive work. '+ - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupIOSomePressure }, 'cgroup.io_some_pressure_stall_time': { - info: 'The amount of time some processes have been waiting due to I/O congestion.' + info: cgroupIOSomePRessureStallTime }, - 'cgroup.io_full_pressure': { - info: 'I/O <a href="https://www.kernel.org/doc/html/latest/accounting/psi.html" target="_blank">Pressure Stall Information</a>. ' + - '<b>Full</b> line indicates the share of time in which <b>all non-idle tasks</b> are stalled on I/O resource simultaneously. ' + - 'In this state actual CPU cycles are going to waste, and a workload that spends extended time in this state is considered to be thrashing. '+ - 'This has severe impact on performance. '+ - 'The ratios are tracked as recent trends over 10-, 60-, and 300-second windows.' + info: cgroupIOFullPressure }, 'cgroup.io_full_pressure_stall_time': { - info: 'The amount of time all non-idle processes have been stalled due to I/O congestion.' + info: cgroupIOFullPressureStallTime + }, + + 'k8s.cgroup.io': { + info: cgroupIO + }, + 'k8s.cgroup.serviced_ops': { + info: cgroupServicedOps + }, + 'k8s.cgroup.queued_ops': { + info: cgroupQueuedOps + }, + 'k8s.cgroup.merged_ops': { + info: cgroupMergedOps + }, + 'k8s.cgroup.throttle_io': { + mainheads: [ + function (_, id) { + return '<div data-netdata="' + id + '"' + + ' data-dimensions="read"' + + ' data-chart-library="gauge"' + + ' data-title="Read Disk I/O"' + + ' data-units="KB/s"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[2] + '"' + + ' role="application"></div>'; + }, + function (_, id) { + return '<div data-netdata="' + id + '"' + + ' data-dimensions="write"' + + ' data-chart-library="gauge"' + + ' data-title="Write Disk I/O"' + + ' data-units="KB/s"' + + ' data-gauge-adjust="width"' + + ' data-width="12%"' + + ' data-before="0"' + + ' data-after="-CHART_DURATION"' + + ' data-points="CHART_DURATION"' + + ' data-colors="' + NETDATA.colors[3] + '"' + + ' role="application"></div>'; + } + ], + info: cgroupThrottleIO + }, + 'k8s.cgroup.throttle_serviced_ops': { + info: cgroupThrottleIOServicesOps + }, + 'k8s.cgroup.io_some_pressure': { + info: cgroupIOSomePressure + }, + 'k8s.cgroup.io_some_pressure_stall_time': { + info: cgroupIOSomePRessureStallTime + }, + 'k8s.cgroup.io_full_pressure': { + info: cgroupIOFullPressure + }, + 'k8s.cgroup.io_full_pressure_stall_time': { + info: cgroupIOFullPressureStallTime }, 'cgroup.swap_read': { info: ebpfSwapRead }, - 'cgroup.swap_write': { info: ebpfSwapWrite }, - 'cgroup.fd_open': { info: ebpfFileOpen }, - 'cgroup.fd_open_error': { info: ebpfFileOpenError }, - 'cgroup.fd_close': { info: ebpfFileClosed }, - 'cgroup.fd_close_error': { info: ebpfFileCloseError }, - 'cgroup.vfs_unlink': { info: ebpfVFSUnlink }, - 'cgroup.vfs_write': { info: ebpfVFSWrite }, - 'cgroup.vfs_write_error': { info: ebpfVFSWriteError }, - 'cgroup.vfs_read': { info: ebpfVFSRead }, - 'cgroup.vfs_read_error': { info: ebpfVFSReadError }, - 'cgroup.vfs_write_bytes': { info: ebpfVFSWriteBytes }, - 'cgroup.vfs_read_bytes': { info: ebpfVFSReadBytes }, - 'cgroup.vfs_fsync': { info: ebpfVFSSync }, - 'cgroup.vfs_fsync_error': { info: ebpfVFSSyncError }, - 'cgroup.vfs_open': { info: ebpfVFSOpen }, - 'cgroup.vfs_open_error': { info: ebpfVFSOpenError }, - 'cgroup.vfs_create': { info: ebpfVFSCreate }, - 'cgroup.vfs_create_error': { info: ebpfVFSCreateError }, - 'cgroup.process_create': { info: ebpfProcessCreate }, - 'cgroup.thread_create': { info: ebpfThreadCreate }, - 'cgroup.task_exit': { info: ebpfTaskExit }, - 'cgroup.task_close': { info: ebpfTaskClose }, - 'cgroup.task_error': { info: ebpfTaskError }, - 'cgroup.dc_ratio': { info: 'Percentage of file accesses that were present in the directory cache. 100% means that every file that was accessed was present in the directory cache. If files are not present in the directory cache 1) they are not present in the file system, 2) the files were not accessed before. Read more about <a href="https://www.kernel.org/doc/htmldocs/filesystems/the_directory_cache.html" target="_blank">directory cache</a>. Netdata also gives a summary for these charts in <a href="#menu_filesystem_submenu_directory_cache__eBPF_">Filesystem submenu</a>.' }, - 'cgroup.shmget': { info: ebpfSHMget }, - 'cgroup.shmat': { info: ebpfSHMat }, - 'cgroup.shmdt': { info: ebpfSHMdt }, - 'cgroup.shmctl': { info: ebpfSHMctl }, - 'cgroup.outbound_conn_v4': { info: ebpfIPV4conn }, - 'cgroup.outbound_conn_v6': { info: ebpfIPV6conn }, - 'cgroup.net_bytes_send': { info: ebpfBandwidthSent }, - 'cgroup.net_bytes_recv': { info: ebpfBandwidthRecv }, - 'cgroup.net_tcp_send': { info: ebpfTCPSendCall }, - 'cgroup.net_tcp_recv': { info: ebpfTCPRecvCall }, - 'cgroup.net_retransmit': { info: ebpfTCPRetransmit }, - 'cgroup.net_udp_send': { info: ebpfUDPsend }, - 'cgroup.net_udp_recv': { info: ebpfUDPrecv }, - 'cgroup.dc_hit_ratio': { info: ebpfDCHit }, - 'cgroup.dc_reference': { info: ebpfDCReference }, - 'cgroup.dc_not_cache': { info: ebpfDCNotCache }, - 'cgroup.dc_not_found': { info: ebpfDCNotFound }, - 'cgroup.cachestat_ratio': { info: ebpfCachestatRatio }, - 'cgroup.cachestat_dirties': { info: ebpfCachestatDirties }, - 'cgroup.cachestat_hits': { info: ebpfCachestatHits }, - 'cgroup.cachestat_misses': { info: ebpfCachestatMisses }, @@ -5300,52 +5648,55 @@ netdataDashboard.context = { }, // ------------------------------------------------------------------------ + // Chrony - 'chrony.system': { - info: 'In normal operation, chronyd never steps the system clock, because any jump in the timescale can have adverse consequences for certain application programs. Instead, any error in the system clock is corrected by slightly speeding up or slowing down the system clock until the error has been removed, and then returning to the system clock’s normal speed. A consequence of this is that there will be a period when the system clock (as read by other programs using the <code>gettimeofday()</code> system call, or by the <code>date</code> command in the shell) will be different from chronyd\'s estimate of the current true time (which it reports to NTP clients when it is operating in server mode). The value reported on this line is the difference due to this effect.', - colors: NETDATA.colors[3] + 'chrony.stratum': { + info: 'The stratum indicates the distance (hops) to the computer with the reference clock. The higher the stratum number, the more the timing accuracy and stability degrades.', }, - 'chrony.offsets': { - info: '<code>last offset</code> is the estimated local offset on the last clock update. <code>RMS offset</code> is a long-term average of the offset value.', - height: 0.5 + 'chrony.current_correction': { + info: 'Any error in the system clock is corrected by slightly speeding up or slowing down the system clock until the error has been removed, and then returning to the system clock’s normal speed. A consequence of this is that there will be a period when the system clock (as read by other programs) will be different from chronyd\'s estimate of the current true time (which it reports to NTP clients when it is operating as a server). The reported value is the difference due to this effect.', }, - 'chrony.stratum': { - info: 'The <code>stratum</code> indicates how many hops away from a computer with an attached reference clock we are. Such a computer is a stratum-1 computer.', - decimalDigits: 0, - height: 0.5 + 'chrony.root_delay': { + info: 'The total of the network path delays to the stratum-1 computer from which the computer is ultimately synchronised.' }, - 'chrony.root': { - info: 'Estimated delays against the root time server this system is synchronized with. <code>delay</code> is the total of the network path delays to the stratum-1 computer from which the computer is ultimately synchronised. <code>dispersion</code> is the total dispersion accumulated through all the computers back to the stratum-1 computer from which the computer is ultimately synchronised. Dispersion is due to system clock resolution, statistical measurement variations etc.' + 'chrony.root_dispersion': { + info: 'The total dispersion accumulated through all the computers back to the stratum-1 computer from which the computer is ultimately synchronised. Dispersion is due to system clock resolution, statistical measurement variations, etc.' + }, + + 'chrony.last_offset': { + info: 'The estimated local offset on the last clock update. A positive value indicates the local time (as previously estimated true time) was ahead of the time sources.', }, 'chrony.frequency': { - info: 'The <code>frequency</code> is the rate by which the system\'s clock would be would be wrong if chronyd was not correcting it. It is expressed in ppm (parts per million). For example, a value of 1ppm would mean that when the system\'s clock thinks it has advanced 1 second, it has actually advanced by 1.000001 seconds relative to true time.', - colors: NETDATA.colors[0] - }, - - 'chrony.residualfreq': { - info: 'This shows the <code>residual frequency</code> for the currently selected reference source. ' + - 'It reflects any difference between what the measurements from the reference source indicate the ' + - 'frequency should be and the frequency currently being used. The reason this is not always zero is ' + - 'that a smoothing procedure is applied to the frequency. Each time a measurement from the reference ' + - 'source is obtained and a new residual frequency computed, the estimated accuracy of this residual ' + - 'is compared with the estimated accuracy (see <code>skew</code>) of the existing frequency value. ' + - 'A weighted average is computed for the new frequency, with weights depending on these accuracies. ' + - 'If the measurements from the reference source follow a consistent trend, the residual will be ' + - 'driven to zero over time.', - height: 0.5, - colors: NETDATA.colors[3] + info: 'The <b>frequency</b> is the rate by which the system’s clock would be wrong if chronyd was not correcting it. It is expressed in ppm (parts per million). For example, a value of 1 ppm would mean that when the system’s clock thinks it has advanced 1 second, it has actually advanced by 1.000001 seconds relative to true time.', + }, + + 'chrony.residual_frequency': { + info: 'The <b>residual frequency</b> for the currently selected reference source. This reflects any difference between what the measurements from the reference source indicate the frequency should be and the frequency currently being used. The reason this is not always zero is that a smoothing procedure is applied to the frequency.', }, 'chrony.skew': { info: 'The estimated error bound on the frequency.', - height: 0.5, - colors: NETDATA.colors[5] }, + 'chrony.ref_measurement_time': { + info: 'The time elapsed since the last measurement from the reference source was processed.', + }, + + 'chrony.leap_status': { + info: '<p>The current leap status of the source.</p><p><b>Normal</b> - indicates the normal status (no leap second). <b>InsertSecond</b> - indicates that a leap second will be inserted at the end of the month. <b>DeleteSecond</b> - indicates that a leap second will be deleted at the end of the month. <b>Unsynchronised</b> - the server has not synchronized properly with the NTP server.</p>', + }, + + 'chrony.activity': { + info: '<p>The number of servers and peers that are online and offline.</p><p><b>Online</b> - the server or peer is currently online (i.e. assumed by chronyd to be reachable). <b>Offline</b> - the server or peer is currently offline (i.e. assumed by chronyd to be unreachable, and no measurements from it will be attempted). <b>BurstOnline</b> - a burst command has been initiated for the server or peer and is being performed. After the burst is complete, the server or peer will be returned to the online state. <b>BurstOffline</b> - a burst command has been initiated for the server or peer and is being performed. After the burst is complete, the server or peer will be returned to the offline state. <b>Unresolved</b> - the name of the server or peer was not resolved to an address yet.</p>', + }, + + // ------------------------------------------------------------------------ + // Couchdb + 'couchdb.active_tasks': { info: 'Active tasks running on this CouchDB <b>cluster</b>. Four types of tasks currently exist: indexer (view building), replication, database compaction and view compaction.' }, @@ -6831,4 +7182,192 @@ netdataDashboard.context = { info: '<p>The number of banned IP addresses.</p>' }, + // ------------------------------------------------------------------------ + // K8s state: Node. + + 'k8s_state.node_allocatable_cpu_requests_utilization': { + info: 'The percentage of allocated CPU resources used by Pod requests. '+ + 'A Pod is scheduled to run on a Node only if the Node has enough CPU resources available to satisfy the Pod CPU request.' + }, + 'k8s_state.node_allocatable_cpu_requests_used': { + info: 'The amount of allocated CPU resources used by Pod requests. ' + + '1000 millicpu is equivalent to '+ + '<a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units" target="_blank">1 physical or virtual CPU core</a>.' + }, + 'k8s_state.node_allocatable_cpu_limits_utilization': { + info: 'The percentage of allocated CPU resources used by Pod limits. '+ + 'Total limits may be over 100 percent (overcommitted).' + }, + 'k8s_state.node_allocatable_cpu_limits_used': { + info: 'The amount of allocated CPU resources used by Pod limits. ' + + '1000 millicpu is equivalent to '+ + '<a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units" target="_blank">1 physical or virtual CPU core</a>.' + }, + 'k8s_state.node_allocatable_mem_requests_utilization': { + info: 'The percentage of allocated memory resources used by Pod requests. '+ + 'A Pod is scheduled to run on a Node only if the Node has enough memory resources available to satisfy the Pod memory request.' + }, + 'k8s_state.node_allocatable_mem_requests_used': { + info: 'The amount of allocated memory resources used by Pod requests.' + }, + 'k8s_state.node_allocatable_mem_limits_utilization': { + info: 'The percentage of allocated memory resources used by Pod limits. '+ + 'Total limits may be over 100 percent (overcommitted).' + }, + 'k8s_state.node_allocatable_mem_limits_used': { + info: 'The amount of allocated memory resources used by Pod limits.' + }, + 'k8s_state.node_allocatable_pods_utilization': { + info: 'Pods limit utilization.' + }, + 'k8s_state.node_allocatable_pods_usage': { + info: '<p>Pods limit usage.</p>'+ + '<p><b>Available</b> - the number of Pods available for scheduling. '+ + '<b>Allocated</b> - the number of Pods that have been scheduled.</p>' + }, + 'k8s_state.node_condition': { + info: 'Health status. '+ + 'If the status of the Ready condition remains False for longer than the <code>pod-eviction-timeout</code> (the default is 5 minutes), '+ + 'then the node controller triggers API-initiated eviction for all Pods assigned to that node. '+ + '<a href="https://kubernetes.io/docs/concepts/architecture/nodes/#condition" target="_blank">More info.</a>' + }, + 'k8s_state.node_pods_readiness': { + info: 'The percentage of Pods that are ready to serve requests.' + }, + 'k8s_state.node_pods_readiness_state': { + info: '<p>Pods readiness state.</p>'+ + '<p><b>Ready</b> - the Pod has passed its readiness probe and ready to serve requests. '+ + '<b>Unready</b> - the Pod has not passed its readiness probe yet.</p>' + }, + 'k8s_state.node_pods_condition': { + info: '<p>Pods state. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions" target="_blank">More info.</a></p>'+ + '<b>PodReady</b> - the Pod is able to serve requests and should be added to the load balancing pools of all matching Services. '+ + '<b>PodScheduled</b> - the Pod has been scheduled to a node. '+ + '<b>PodInitialized</b> - all init containers have completed successfully. '+ + '<b>ContainersReady</b> - all containers in the Pod are ready.</p>' + }, + 'k8s_state.node_pods_phase': { + info: '<p>Pods phase. The phase of a Pod is a high-level summary of where the Pod is in its lifecycle. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase" target="_blank">More info.</a></p>'+ + '<p><b>Running</b> - the Pod has been bound to a node, and all of the containers have been created. '+ + 'At least one container is still running, or is in the process of starting or restarting. ' + + '<b>Failed</b> - all containers in the Pod have terminated, and at least one container has terminated in failure. '+ + 'That is, the container either exited with non-zero status or was terminated by the system. ' + + '<b>Succedeed</b> - all containers in the Pod have terminated in success, and will not be restarted. ' + + '<b>Pending</b> - the Pod has been accepted by the Kubernetes cluster, but one or more of the containers has not been set up and made ready to run.</p>' + }, + 'k8s_state.node_containers': { + info: 'The total number of containers and init containers.' + }, + 'k8s_state.node_containers_state': { + info: '<p>The number of containers in different lifecycle states. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states" target="_blank">More info.</a></p>'+ + '<p><b>Running</b> - a container is executing without issues. '+ + '<b>Waiting</b> - a container is still running the operations it requires in order to complete start up. '+ + '<b>Terminated</b> - a container began execution and then either ran to completion or failed for some reason.</p>' + }, + 'k8s_state.node_init_containers_state': { + info: '<p>The number of init containers in different lifecycle states. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states" target="_blank">More info.</a></p>'+ + '<p><b>Running</b> - a container is executing without issues. '+ + '<b>Waiting</b> - a container is still running the operations it requires in order to complete start up. '+ + '<b>Terminated</b> - a container began execution and then either ran to completion or failed for some reason.</p>' + }, + 'k8s_state.node_age': { + info: 'The lifetime of the Node.' + }, + + // K8s state: Pod. + + 'k8s_state.pod_cpu_requests_used': { + info: 'The overall CPU resource requests for a Pod. '+ + 'This is the sum of the CPU requests for all the Containers in the Pod. '+ + 'Provided the system has CPU time free, a container is guaranteed to be allocated as much CPU as it requests. '+ + '1000 millicpu is equivalent to '+ + '<a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units" target="_blank">1 physical or virtual CPU core</a>.' + }, + 'k8s_state.pod_cpu_limits_used': { + info: 'The overall CPU resource limits for a Pod. '+ + 'This is the sum of the CPU limits for all the Containers in the Pod. '+ + 'If set, containers cannot use more CPU than the configured limit. '+ + '1000 millicpu is equivalent to '+ + '<a href="https://kubernetes.io/docs/tasks/configure-pod-container/assign-cpu-resource/#cpu-units" target="_blank">1 physical or virtual CPU core</a>.' + }, + 'k8s_state.pod_mem_requests_used': { + info: 'The overall memory resource requests for a Pod. '+ + 'This is the sum of the memory requests for all the Containers in the Pod.' + }, + 'k8s_state.pod_mem_limits_used': { + info: 'The overall memory resource limits for a Pod. '+ + 'This is the sum of the memory limits for all the Containers in the Pod. '+ + 'If set, containers cannot use more RAM than the configured limit.' + }, + 'k8s_state.pod_condition': { + info: 'The current state of the Pod. ' + + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-conditions" target="_blank">More info.</a></p>'+ + '<p><b>PodReady</b> - the Pod is able to serve requests and should be added to the load balancing pools of all matching Services. ' + + '<b>PodScheduled</b> - the Pod has been scheduled to a node. ' + + '<b>PodInitialized</b> - all init containers have completed successfully. ' + + '<b>ContainersReady</b> - all containers in the Pod are ready. ' + }, + 'k8s_state.pod_phase': { + info: 'High-level summary of where the Pod is in its lifecycle. ' + + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase" target="_blank">More info.</a></p>'+ + '<p><b>Running</b> - the Pod has been bound to a node, and all of the containers have been created. '+ + 'At least one container is still running, or is in the process of starting or restarting. ' + + '<b>Failed</b> - all containers in the Pod have terminated, and at least one container has terminated in failure. '+ + 'That is, the container either exited with non-zero status or was terminated by the system. ' + + '<b>Succedeed</b> - all containers in the Pod have terminated in success, and will not be restarted. ' + + '<b>Pending</b> - the Pod has been accepted by the Kubernetes cluster, but one or more of the containers has not been set up and made ready to run. '+ + 'This includes time a Pod spends waiting to be scheduled as well as the time spent downloading container images over the network. ' + }, + 'k8s_state.pod_age': { + info: 'The <a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-lifetime" target="_blank">lifetime</a> of the Pod. ' + }, + 'k8s_state.pod_containers': { + info: 'The number of containers and init containers belonging to the Pod.' + }, + 'k8s_state.pod_containers_state': { + info: 'The state of each container inside this Pod. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states" target="_blank">More info.</a> '+ + '<p><b>Running</b> - a container is executing without issues. '+ + '<b>Waiting</b> - a container is still running the operations it requires in order to complete start up. '+ + '<b>Terminated</b> - a container began execution and then either ran to completion or failed for some reason.</p>' + }, + 'k8s_state.pod_init_containers_state': { + info: 'The state of each init container inside this Pod. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states" target="_blank">More info.</a> '+ + '<p><b>Running</b> - a container is executing without issues. '+ + '<b>Waiting</b> - a container is still running the operations it requires in order to complete start up. '+ + '<b>Terminated</b> - a container began execution and then either ran to completion or failed for some reason.</p>' + }, + + // K8s state: Pod container. + + 'k8s_state.pod_container_readiness_state': { + info: 'Specifies whether the container has passed its readiness probe. '+ + 'Kubelet uses readiness probes to know when a container is ready to start accepting traffic.' + }, + 'k8s_state.pod_container_restarts': { + info: 'The number of times the container has been restarted.' + }, + 'k8s_state.pod_container_state': { + info: 'Current state of the container. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-states" target="_blank">More info.</a> '+ + '<p><b>Running</b> - a container is executing without issues. '+ + '<b>Waiting</b> - a container is still running the operations it requires in order to complete start up. '+ + '<b>Terminated</b> - a container began execution and then either ran to completion or failed for some reason.</p>' + }, + 'k8s_state.pod_container_waiting_state_reason': { + info: 'Reason the container is not yet running. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting" target="_blank">More info.</a> ' + }, + 'k8s_state.pod_container_terminated_state_reason': { + info: 'Reason from the last termination of the container. '+ + '<a href="https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-terminated" target="_blank">More info.</a>' + }, + + // ------------------------------------------------------------------------ + }; |