diff options
Diffstat (limited to 'src/collectors')
219 files changed, 3321 insertions, 17666 deletions
diff --git a/src/collectors/COLLECTORS.md b/src/collectors/COLLECTORS.md index ebd7b2a9a..608649a38 100644 --- a/src/collectors/COLLECTORS.md +++ b/src/collectors/COLLECTORS.md @@ -13,7 +13,7 @@ Learn more about [how collectors work](/src/collectors/README.md), and then lear If you don't see the app/service you'd like to monitor in this list: -- If your application has a Prometheus endpoint, Netdata can monitor it! Look at our [generic Prometheus collector](/src/go/collectors/go.d.plugin/modules/prometheus/README.md). +- If your application has a Prometheus endpoint, Netdata can monitor it! Look at our [generic Prometheus collector](/src/go/plugin/go.d/modules/prometheus/README.md). - If your application is instrumented to expose [StatsD](https://blog.netdata.cloud/introduction-to-statsd/) metrics, see our [generic StatsD collector](/src/collectors/statsd.plugin/README.md). @@ -23,297 +23,297 @@ If you don't see the app/service you'd like to monitor in this list: - If you don't see the collector there, you can make a [feature request](https://github.com/netdata/netdata/issues/new/choose) on GitHub. -- If you have basic software development skills, you can add your own plugin in [Go](/src/go/collectors/go.d.plugin/README.md#how-to-develop-a-collector) or [Python](/docs/developer-and-contributor-corner/python-collector.md) +- If you have basic software development skills, you can add your own plugin in [Go](/src/go/plugin/go.d/README.md#how-to-develop-a-collector) or [Python](/docs/developer-and-contributor-corner/python-collector.md) ## Available Data Collection Integrations <!-- AUTOGENERATED PART BY integrations/gen_doc_collector_page.py SCRIPT, DO NOT EDIT MANUALLY --> ### APM -- [Alamos FE2 server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/alamos_fe2_server.md) +- [Alamos FE2 server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/alamos_fe2_server.md) -- [Apache Airflow](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/apache_airflow.md) +- [Apache Airflow](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/apache_airflow.md) -- [Apache Flink](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/apache_flink.md) +- [Apache Flink](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/apache_flink.md) -- [Audisto](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/audisto.md) +- [Audisto](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/audisto.md) -- [Dependency-Track](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dependency-track.md) +- [Dependency-Track](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dependency-track.md) - [Go applications (EXPVAR)](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md) -- [Google Pagespeed](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/google_pagespeed.md) +- [Google Pagespeed](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/google_pagespeed.md) -- [IBM AIX systems Njmon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_aix_systems_njmon.md) +- [IBM AIX systems Njmon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_aix_systems_njmon.md) -- [JMX](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/jmx.md) +- [JMX](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/jmx.md) -- [NRPE daemon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nrpe_daemon.md) +- [NRPE daemon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nrpe_daemon.md) -- [Sentry](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sentry.md) +- [Sentry](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sentry.md) -- [Sysload](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sysload.md) +- [Sysload](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sysload.md) -- [VSCode](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/vscode.md) +- [VSCode](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/vscode.md) -- [YOURLS URL Shortener](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/yourls_url_shortener.md) +- [YOURLS URL Shortener](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/yourls_url_shortener.md) -- [bpftrace variables](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bpftrace_variables.md) +- [bpftrace variables](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bpftrace_variables.md) -- [gpsd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gpsd.md) +- [gpsd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gpsd.md) -- [jolokia](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/jolokia.md) +- [jolokia](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/jolokia.md) -- [phpDaemon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/phpdaemon/integrations/phpdaemon.md) +- [phpDaemon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/phpdaemon/integrations/phpdaemon.md) ### Authentication and Authorization -- [Fail2ban](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/fail2ban/integrations/fail2ban.md) +- [Fail2ban](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/fail2ban/integrations/fail2ban.md) -- [FreeRADIUS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/freeradius/integrations/freeradius.md) +- [FreeRADIUS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/freeradius/integrations/freeradius.md) -- [HashiCorp Vault secrets](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hashicorp_vault_secrets.md) +- [HashiCorp Vault secrets](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hashicorp_vault_secrets.md) -- [LDAP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ldap.md) +- [LDAP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ldap.md) -- [OpenLDAP (community)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openldap_community.md) +- [OpenLDAP (community)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openldap_community.md) - [OpenLDAP](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/openldap/integrations/openldap.md) -- [RADIUS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/radius.md) +- [RADIUS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/radius.md) -- [SSH](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ssh.md) +- [SSH](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ssh.md) -- [TACACS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tacacs.md) +- [TACACS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tacacs.md) ### Blockchain Servers -- [Chia](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/chia.md) +- [Chia](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/chia.md) -- [Crypto exchanges](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/crypto_exchanges.md) +- [Crypto exchanges](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/crypto_exchanges.md) -- [Cryptowatch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cryptowatch.md) +- [Cryptowatch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cryptowatch.md) -- [Go-ethereum](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/geth/integrations/go-ethereum.md) +- [Go-ethereum](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/geth/integrations/go-ethereum.md) -- [Helium miner (validator)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/helium_miner_validator.md) +- [Helium miner (validator)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/helium_miner_validator.md) -- [IOTA full node](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/iota_full_node.md) +- [IOTA full node](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/iota_full_node.md) -- [Sia](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sia.md) +- [Sia](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sia.md) ### CICD Platforms -- [Concourse](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/concourse.md) +- [Concourse](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/concourse.md) -- [GitLab Runner](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gitlab_runner.md) +- [GitLab Runner](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gitlab_runner.md) -- [Jenkins](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/jenkins.md) +- [Jenkins](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/jenkins.md) -- [Puppet](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/puppet/integrations/puppet.md) +- [Puppet](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/puppet/integrations/puppet.md) ### Cloud Provider Managed -- [AWS EC2 Compute instances](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_ec2_compute_instances.md) +- [AWS EC2 Compute instances](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_ec2_compute_instances.md) -- [AWS EC2 Spot Instance](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_ec2_spot_instance.md) +- [AWS EC2 Spot Instance](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_ec2_spot_instance.md) -- [AWS ECS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_ecs.md) +- [AWS ECS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_ecs.md) -- [AWS Health events](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_health_events.md) +- [AWS Health events](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_health_events.md) -- [AWS Quota](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_quota.md) +- [AWS Quota](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_quota.md) -- [AWS S3 buckets](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_s3_buckets.md) +- [AWS S3 buckets](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_s3_buckets.md) -- [AWS SQS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_sqs.md) +- [AWS SQS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_sqs.md) -- [AWS instance health](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_instance_health.md) +- [AWS instance health](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_instance_health.md) -- [Akamai Global Traffic Management](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/akamai_global_traffic_management.md) +- [Akamai Global Traffic Management](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/akamai_global_traffic_management.md) -- [Akami Cloudmonitor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/akami_cloudmonitor.md) +- [Akami Cloudmonitor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/akami_cloudmonitor.md) -- [Alibaba Cloud](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/alibaba_cloud.md) +- [Alibaba Cloud](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/alibaba_cloud.md) -- [ArvanCloud CDN](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/arvancloud_cdn.md) +- [ArvanCloud CDN](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/arvancloud_cdn.md) -- [Azure AD App passwords](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_ad_app_passwords.md) +- [Azure AD App passwords](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_ad_app_passwords.md) -- [Azure Elastic Pool SQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_elastic_pool_sql.md) +- [Azure Elastic Pool SQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_elastic_pool_sql.md) -- [Azure Resources](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_resources.md) +- [Azure Resources](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_resources.md) -- [Azure SQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_sql.md) +- [Azure SQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_sql.md) -- [Azure Service Bus](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_service_bus.md) +- [Azure Service Bus](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_service_bus.md) -- [Azure application](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/azure_application.md) +- [Azure application](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/azure_application.md) -- [BigQuery](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bigquery.md) +- [BigQuery](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bigquery.md) -- [CloudWatch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cloudwatch.md) +- [CloudWatch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cloudwatch.md) -- [Dell EMC ECS cluster](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dell_emc_ecs_cluster.md) +- [Dell EMC ECS cluster](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dell_emc_ecs_cluster.md) -- [DigitalOcean](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/digitalocean.md) +- [DigitalOcean](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/digitalocean.md) -- [GCP GCE](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gcp_gce.md) +- [GCP GCE](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gcp_gce.md) -- [GCP Quota](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gcp_quota.md) +- [GCP Quota](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gcp_quota.md) -- [Google Cloud Platform](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/google_cloud_platform.md) +- [Google Cloud Platform](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/google_cloud_platform.md) -- [Google Stackdriver](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/google_stackdriver.md) +- [Google Stackdriver](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/google_stackdriver.md) -- [Linode](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/linode.md) +- [Linode](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/linode.md) -- [Lustre metadata](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/lustre_metadata.md) +- [Lustre metadata](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/lustre_metadata.md) -- [Nextcloud servers](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nextcloud_servers.md) +- [Nextcloud servers](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nextcloud_servers.md) -- [OpenStack](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openstack.md) +- [OpenStack](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openstack.md) -- [Zerto](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/zerto.md) +- [Zerto](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/zerto.md) ### Containers and VMs - [Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/containers.md) -- [Docker Engine](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/docker_engine/integrations/docker_engine.md) +- [Docker Engine](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/docker_engine/integrations/docker_engine.md) -- [Docker Hub repository](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dockerhub/integrations/docker_hub_repository.md) +- [Docker Hub repository](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dockerhub/integrations/docker_hub_repository.md) -- [Docker](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md) +- [Docker](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/docker/integrations/docker.md) - [LXC Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/lxc_containers.md) - [Libvirt Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/libvirt_containers.md) -- [NSX-T](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nsx-t.md) +- [NSX-T](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nsx-t.md) -- [Podman](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/podman.md) +- [Podman](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/podman.md) - [Proxmox Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/proxmox_containers.md) -- [Proxmox VE](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/proxmox_ve.md) +- [Proxmox VE](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/proxmox_ve.md) -- [VMware vCenter Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/vsphere/integrations/vmware_vcenter_server.md) +- [VMware vCenter Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/vsphere/integrations/vmware_vcenter_server.md) - [Virtual Machines](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/virtual_machines.md) - [Xen XCP-ng](https://github.com/netdata/netdata/blob/master/src/collectors/xenstat.plugin/integrations/xen_xcp-ng.md) -- [cAdvisor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cadvisor.md) +- [cAdvisor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cadvisor.md) - [oVirt Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/ovirt_containers.md) -- [vCenter Server Appliance](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/vcsa/integrations/vcenter_server_appliance.md) +- [vCenter Server Appliance](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/vcsa/integrations/vcenter_server_appliance.md) ### Databases -- [4D Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/4d_server.md) +- [4D Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/4d_server.md) -- [AWS RDS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aws_rds.md) +- [AWS RDS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aws_rds.md) -- [Cassandra](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/cassandra/integrations/cassandra.md) +- [Cassandra](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/cassandra/integrations/cassandra.md) -- [ClickHouse](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/clickhouse/integrations/clickhouse.md) +- [ClickHouse](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/clickhouse/integrations/clickhouse.md) -- [ClusterControl CMON](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/clustercontrol_cmon.md) +- [ClusterControl CMON](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/clustercontrol_cmon.md) -- [CockroachDB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/cockroachdb/integrations/cockroachdb.md) +- [CockroachDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/cockroachdb/integrations/cockroachdb.md) -- [CouchDB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/couchdb/integrations/couchdb.md) +- [CouchDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/couchdb/integrations/couchdb.md) -- [Couchbase](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/couchbase/integrations/couchbase.md) +- [Couchbase](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/couchbase/integrations/couchbase.md) -- [HANA](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hana.md) +- [HANA](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hana.md) -- [Hasura GraphQL Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hasura_graphql_server.md) +- [Hasura GraphQL Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hasura_graphql_server.md) -- [InfluxDB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/influxdb.md) +- [InfluxDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/influxdb.md) -- [Machbase](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/machbase.md) +- [Machbase](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/machbase.md) -- [MariaDB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/mysql/integrations/mariadb.md) +- [MariaDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/mysql/integrations/mariadb.md) -- [Memcached (community)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/memcached_community.md) +- [Memcached (community)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/memcached_community.md) -- [Memcached](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/memcached/integrations/memcached.md) +- [Memcached](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/memcached/integrations/memcached.md) -- [MongoDB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/mongodb/integrations/mongodb.md) +- [MongoDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/mongodb/integrations/mongodb.md) -- [MySQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/mysql/integrations/mysql.md) +- [MySQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/mysql/integrations/mysql.md) -- [ODBC](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/odbc.md) +- [ODBC](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/odbc.md) -- [Oracle DB (community)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/oracle_db_community.md) +- [Oracle DB (community)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/oracle_db_community.md) - [Oracle DB](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/oracledb/integrations/oracle_db.md) -- [Patroni](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/patroni.md) +- [Patroni](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/patroni.md) -- [Percona MySQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/mysql/integrations/percona_mysql.md) +- [Percona MySQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/mysql/integrations/percona_mysql.md) -- [PgBouncer](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/pgbouncer/integrations/pgbouncer.md) +- [PgBouncer](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/pgbouncer/integrations/pgbouncer.md) -- [Pgpool-II](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/pgpool-ii.md) +- [Pgpool-II](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/pgpool-ii.md) -- [Pika](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/pika/integrations/pika.md) +- [Pika](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/pika/integrations/pika.md) -- [PostgreSQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/postgres/integrations/postgresql.md) +- [PostgreSQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/postgres/integrations/postgresql.md) -- [ProxySQL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/proxysql/integrations/proxysql.md) +- [ProxySQL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/proxysql/integrations/proxysql.md) -- [Redis](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/redis/integrations/redis.md) +- [Redis](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/redis/integrations/redis.md) -- [RethinkDB](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md) +- [RethinkDB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/rethinkdb/integrations/rethinkdb.md) -- [RiakKV](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/riakkv/integrations/riakkv.md) +- [Riak KV](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/riakkv/integrations/riak_kv.md) -- [SQL Database agnostic](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sql_database_agnostic.md) +- [SQL Database agnostic](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sql_database_agnostic.md) -- [Vertica](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/vertica.md) +- [Vertica](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/vertica.md) -- [Warp10](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/warp10.md) +- [Warp10](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/warp10.md) -- [pgBackRest](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/pgbackrest.md) +- [pgBackRest](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/pgbackrest.md) ### Distributed Computing Systems - [BOINC](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/boinc/integrations/boinc.md) -- [Gearman](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/gearman/integrations/gearman.md) +- [Gearman](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/gearman/integrations/gearman.md) ### DNS and DHCP Servers -- [Akamai Edge DNS Traffic](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/akamai_edge_dns_traffic.md) +- [Akamai Edge DNS Traffic](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/akamai_edge_dns_traffic.md) -- [CoreDNS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/coredns/integrations/coredns.md) +- [CoreDNS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/coredns/integrations/coredns.md) -- [DNS query](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dnsquery/integrations/dns_query.md) +- [DNS query](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dnsquery/integrations/dns_query.md) -- [DNSBL](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dnsbl.md) +- [DNSBL](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dnsbl.md) -- [DNSdist](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dnsdist/integrations/dnsdist.md) +- [DNSdist](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dnsdist/integrations/dnsdist.md) -- [Dnsmasq DHCP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dnsmasq_dhcp/integrations/dnsmasq_dhcp.md) +- [Dnsmasq DHCP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dnsmasq_dhcp/integrations/dnsmasq_dhcp.md) -- [Dnsmasq](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dnsmasq/integrations/dnsmasq.md) +- [Dnsmasq](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dnsmasq/integrations/dnsmasq.md) -- [ISC DHCP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/isc_dhcpd/integrations/isc_dhcp.md) +- [ISC DHCP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/isc_dhcpd/integrations/isc_dhcp.md) -- [Name Server Daemon](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md) +- [NSD](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nsd/integrations/nsd.md) -- [NextDNS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nextdns.md) +- [NextDNS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nextdns.md) -- [Pi-hole](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/pihole/integrations/pi-hole.md) +- [Pi-hole](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/pihole/integrations/pi-hole.md) -- [PowerDNS Authoritative Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/powerdns/integrations/powerdns_authoritative_server.md) +- [PowerDNS Authoritative Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/powerdns/integrations/powerdns_authoritative_server.md) -- [PowerDNS Recursor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/powerdns_recursor/integrations/powerdns_recursor.md) +- [PowerDNS Recursor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/powerdns_recursor/integrations/powerdns_recursor.md) -- [Unbound](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/unbound/integrations/unbound.md) +- [Unbound](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/unbound/integrations/unbound.md) ### eBPF @@ -353,9 +353,9 @@ If you don't see the app/service you'd like to monitor in this list: ### FreeBSD -- [FreeBSD NFS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/freebsd_nfs.md) +- [FreeBSD NFS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/freebsd_nfs.md) -- [FreeBSD RCTL-RACCT](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/freebsd_rctl-racct.md) +- [FreeBSD RCTL-RACCT](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/freebsd_rctl-racct.md) - [dev.cpu.0.freq](https://github.com/netdata/netdata/blob/master/src/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md) @@ -419,43 +419,43 @@ If you don't see the app/service you'd like to monitor in this list: ### FTP Servers -- [ProFTPD](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/proftpd.md) +- [ProFTPD](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/proftpd.md) ### Gaming -- [BungeeCord](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bungeecord.md) +- [BungeeCord](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bungeecord.md) -- [Minecraft](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/minecraft.md) +- [Minecraft](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/minecraft.md) -- [OpenRCT2](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openrct2.md) +- [OpenRCT2](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openrct2.md) - [SpigotMC](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md) -- [Steam](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/steam.md) +- [Steam](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/steam.md) ### Generic Data Collection -- [Custom Exporter](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/custom_exporter.md) +- [Custom Exporter](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/custom_exporter.md) -- [Excel spreadsheet](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/excel_spreadsheet.md) +- [Excel spreadsheet](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/excel_spreadsheet.md) -- [Generic Command Line Output](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/generic_command_line_output.md) +- [Generic Command Line Output](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/generic_command_line_output.md) -- [JetBrains Floating License Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/jetbrains_floating_license_server.md) +- [JetBrains Floating License Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/jetbrains_floating_license_server.md) -- [OpenWeatherMap](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openweathermap.md) +- [OpenWeatherMap](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openweathermap.md) - [Pandas](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/pandas/integrations/pandas.md) -- [Prometheus endpoint](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/prometheus_endpoint.md) +- [Prometheus endpoint](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/prometheus_endpoint.md) -- [SNMP devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/snmp/integrations/snmp_devices.md) +- [SNMP devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/snmp/integrations/snmp_devices.md) -- [Shell command](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/shell_command.md) +- [Shell command](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/shell_command.md) -- [Tankerkoenig API](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tankerkoenig_api.md) +- [Tankerkoenig API](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tankerkoenig_api.md) -- [TwinCAT ADS Web Service](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/twincat_ads_web_service.md) +- [TwinCAT ADS Web Service](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/twincat_ads_web_service.md) ### Hardware Devices and Sensors @@ -463,143 +463,143 @@ If you don't see the app/service you'd like to monitor in this list: - [AM2320](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/am2320/integrations/am2320.md) -- [AMD CPU & GPU](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/amd_cpu_&_gpu.md) +- [AMD CPU & GPU](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/amd_cpu_&_gpu.md) - [AMD GPU](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/amd_gpu.md) -- [ARM HWCPipe](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/arm_hwcpipe.md) +- [ARM HWCPipe](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/arm_hwcpipe.md) - [CUPS](https://github.com/netdata/netdata/blob/master/src/collectors/cups.plugin/integrations/cups.md) -- [HDD temperature](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hddtemp/integrations/hdd_temperature.md) +- [HDD temperature](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/hddtemp/integrations/hdd_temperature.md) -- [HP iLO](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hp_ilo.md) +- [HP iLO](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hp_ilo.md) -- [IBM CryptoExpress (CEX) cards](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_cryptoexpress_cex_cards.md) +- [IBM CryptoExpress (CEX) cards](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_cryptoexpress_cex_cards.md) -- [IBM Z Hardware Management Console](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_z_hardware_management_console.md) +- [IBM Z Hardware Management Console](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_z_hardware_management_console.md) -- [IPMI (By SoundCloud)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ipmi_by_soundcloud.md) +- [IPMI (By SoundCloud)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ipmi_by_soundcloud.md) -- [Intel GPU](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/intelgpu/integrations/intel_gpu.md) +- [Intel GPU](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/intelgpu/integrations/intel_gpu.md) - [Intelligent Platform Management Interface (IPMI)](https://github.com/netdata/netdata/blob/master/src/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md) -- [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/sensors/integrations/linux_sensors_lm-sensors.md) +- [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/sensors/integrations/linux_sensors_lm-sensors.md) - [Linux Sensors (sysfs)](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md) -- [NVML](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nvml.md) +- [NVML](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nvml.md) -- [Nvidia GPU](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/nvidia_smi/integrations/nvidia_gpu.md) +- [Nvidia GPU](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nvidia_smi/integrations/nvidia_gpu.md) -- [Raritan PDU](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/raritan_pdu.md) +- [Raritan PDU](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/raritan_pdu.md) -- [S.M.A.R.T.](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/smartctl/integrations/s.m.a.r.t..md) +- [S.M.A.R.T.](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/smartctl/integrations/s.m.a.r.t..md) -- [ServerTech](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/servertech.md) +- [ServerTech](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/servertech.md) -- [Siemens S7 PLC](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/siemens_s7_plc.md) +- [Siemens S7 PLC](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/siemens_s7_plc.md) -- [T-Rex NVIDIA GPU Miner](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/t-rex_nvidia_gpu_miner.md) +- [T-Rex NVIDIA GPU Miner](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/t-rex_nvidia_gpu_miner.md) ### IoT Devices -- [Airthings Waveplus air sensor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/airthings_waveplus_air_sensor.md) +- [Airthings Waveplus air sensor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/airthings_waveplus_air_sensor.md) -- [Bobcat Miner 300](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bobcat_miner_300.md) +- [Bobcat Miner 300](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bobcat_miner_300.md) -- [Christ Elektronik CLM5IP power panel](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/christ_elektronik_clm5ip_power_panel.md) +- [Christ Elektronik CLM5IP power panel](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/christ_elektronik_clm5ip_power_panel.md) -- [CraftBeerPi](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/craftbeerpi.md) +- [CraftBeerPi](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/craftbeerpi.md) -- [Dutch Electricity Smart Meter](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dutch_electricity_smart_meter.md) +- [Dutch Electricity Smart Meter](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dutch_electricity_smart_meter.md) -- [Elgato Key Light devices.](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/elgato_key_light_devices..md) +- [Elgato Key Light devices.](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/elgato_key_light_devices..md) -- [Energomera smart power meters](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/energomera_smart_power_meters.md) +- [Energomera smart power meters](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/energomera_smart_power_meters.md) -- [Helium hotspot](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/helium_hotspot.md) +- [Helium hotspot](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/helium_hotspot.md) -- [Homebridge](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/homebridge.md) +- [Homebridge](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/homebridge.md) -- [Homey](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/homey.md) +- [Homey](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/homey.md) -- [Jarvis Standing Desk](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/jarvis_standing_desk.md) +- [Jarvis Standing Desk](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/jarvis_standing_desk.md) -- [MP707 USB thermometer](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mp707_usb_thermometer.md) +- [MP707 USB thermometer](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mp707_usb_thermometer.md) -- [Modbus protocol](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/modbus_protocol.md) +- [Modbus protocol](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/modbus_protocol.md) -- [Monnit Sensors MQTT](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/monnit_sensors_mqtt.md) +- [Monnit Sensors MQTT](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/monnit_sensors_mqtt.md) -- [Nature Remo E lite devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nature_remo_e_lite_devices.md) +- [Nature Remo E lite devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nature_remo_e_lite_devices.md) -- [Netatmo sensors](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/netatmo_sensors.md) +- [Netatmo sensors](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/netatmo_sensors.md) -- [OpenHAB](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openhab.md) +- [OpenHAB](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openhab.md) -- [Personal Weather Station](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/personal_weather_station.md) +- [Personal Weather Station](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/personal_weather_station.md) -- [Philips Hue](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/philips_hue.md) +- [Philips Hue](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/philips_hue.md) -- [Pimoroni Enviro+](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/pimoroni_enviro+.md) +- [Pimoroni Enviro+](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/pimoroni_enviro+.md) -- [Powerpal devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/powerpal_devices.md) +- [Powerpal devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/powerpal_devices.md) -- [Radio Thermostat](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/radio_thermostat.md) +- [Radio Thermostat](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/radio_thermostat.md) -- [SMA Inverters](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sma_inverters.md) +- [SMA Inverters](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sma_inverters.md) -- [Salicru EQX inverter](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/salicru_eqx_inverter.md) +- [Salicru EQX inverter](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/salicru_eqx_inverter.md) -- [Sense Energy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sense_energy.md) +- [Sense Energy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sense_energy.md) -- [Shelly humidity sensor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/shelly_humidity_sensor.md) +- [Shelly humidity sensor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/shelly_humidity_sensor.md) -- [Smart meters SML](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/smart_meters_sml.md) +- [Smart meters SML](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/smart_meters_sml.md) -- [Solar logging stick](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/solar_logging_stick.md) +- [Solar logging stick](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/solar_logging_stick.md) -- [SolarEdge inverters](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/solaredge_inverters.md) +- [SolarEdge inverters](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/solaredge_inverters.md) -- [Solis Ginlong 5G inverters](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/solis_ginlong_5g_inverters.md) +- [Solis Ginlong 5G inverters](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/solis_ginlong_5g_inverters.md) -- [Sunspec Solar Energy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sunspec_solar_energy.md) +- [Sunspec Solar Energy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sunspec_solar_energy.md) -- [TP-Link P110](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tp-link_p110.md) +- [TP-Link P110](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tp-link_p110.md) -- [Tado smart heating solution](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tado_smart_heating_solution.md) +- [Tado smart heating solution](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tado_smart_heating_solution.md) -- [Tesla Powerwall](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tesla_powerwall.md) +- [Tesla Powerwall](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tesla_powerwall.md) -- [Tesla Wall Connector](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tesla_wall_connector.md) +- [Tesla Wall Connector](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tesla_wall_connector.md) -- [Tesla vehicle](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/tesla_vehicle.md) +- [Tesla vehicle](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/tesla_vehicle.md) -- [Xiaomi Mi Flora](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/xiaomi_mi_flora.md) +- [Xiaomi Mi Flora](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/xiaomi_mi_flora.md) -- [iqAir AirVisual air quality monitors](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/iqair_airvisual_air_quality_monitors.md) +- [iqAir AirVisual air quality monitors](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/iqair_airvisual_air_quality_monitors.md) ### Kubernetes -- [Cilium Agent](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cilium_agent.md) +- [Cilium Agent](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cilium_agent.md) -- [Cilium Operator](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cilium_operator.md) +- [Cilium Operator](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cilium_operator.md) -- [Cilium Proxy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cilium_proxy.md) +- [Cilium Proxy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cilium_proxy.md) -- [Kubelet](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/k8s_kubelet/integrations/kubelet.md) +- [Kubelet](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/k8s_kubelet/integrations/kubelet.md) -- [Kubeproxy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/k8s_kubeproxy/integrations/kubeproxy.md) +- [Kubeproxy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/k8s_kubeproxy/integrations/kubeproxy.md) -- [Kubernetes Cluster Cloud Cost](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kubernetes_cluster_cloud_cost.md) +- [Kubernetes Cluster Cloud Cost](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kubernetes_cluster_cloud_cost.md) -- [Kubernetes Cluster State](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/k8s_state/integrations/kubernetes_cluster_state.md) +- [Kubernetes Cluster State](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/k8s_state/integrations/kubernetes_cluster_state.md) - [Kubernetes Containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/kubernetes_containers.md) -- [Rancher](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/rancher.md) +- [Rancher](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/rancher.md) ### Linux Systems @@ -607,7 +607,7 @@ If you don't see the app/service you'd like to monitor in this list: - [Disk space](https://github.com/netdata/netdata/blob/master/src/collectors/diskspace.plugin/integrations/disk_space.md) -- [OpenRC](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openrc.md) +- [OpenRC](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openrc.md) #### CPU @@ -635,8 +635,6 @@ If you don't see the app/service you'd like to monitor in this list: - [ZFS Adaptive Replacement Cache](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md) -- [ZFS Pools](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/zfs_pools.md) - #### Firewall - [Conntrack](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/conntrack.md) @@ -645,7 +643,7 @@ If you don't see the app/service you'd like to monitor in this list: - [Synproxy](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/synproxy.md) -- [nftables](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nftables.md) +- [nftables](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nftables.md) #### IPC @@ -679,7 +677,7 @@ If you don't see the app/service you'd like to monitor in this list: #### Network -- [Access Points](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/ap/integrations/access_points.md) +- [Access Points](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/ap/integrations/access_points.md) - [IP Virtual Server](https://github.com/netdata/netdata/blob/master/src/collectors/proc.plugin/integrations/ip_virtual_server.md) @@ -721,185 +719,177 @@ If you don't see the app/service you'd like to monitor in this list: ### Logs Servers -- [AuthLog](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/authlog.md) +- [AuthLog](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/authlog.md) -- [Fluentd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/fluentd/integrations/fluentd.md) +- [Fluentd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/fluentd/integrations/fluentd.md) -- [Graylog Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/graylog_server.md) +- [Graylog Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/graylog_server.md) -- [Logstash](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/logstash/integrations/logstash.md) +- [Logstash](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/logstash/integrations/logstash.md) -- [journald](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/journald.md) +- [journald](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/journald.md) -- [loki](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/loki.md) +- [loki](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/loki.md) -- [mtail](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mtail.md) +- [mtail](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mtail.md) ### macOS Systems -- [Apple Time Machine](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/apple_time_machine.md) +- [Apple Time Machine](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/apple_time_machine.md) - [macOS](https://github.com/netdata/netdata/blob/master/src/collectors/macos.plugin/integrations/macos.md) ### Mail Servers -- [DMARC](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dmarc.md) +- [DMARC](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dmarc.md) -- [Dovecot](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/dovecot/integrations/dovecot.md) +- [Dovecot](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dovecot/integrations/dovecot.md) -- [Exim](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/exim/integrations/exim.md) +- [Exim](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/exim/integrations/exim.md) -- [Halon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/halon.md) +- [Halon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/halon.md) -- [Maildir](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/maildir.md) +- [Maildir](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/maildir.md) -- [Postfix](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/postfix/integrations/postfix.md) +- [Postfix](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/postfix/integrations/postfix.md) ### Media Services -- [Discourse](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/discourse.md) - -- [Icecast](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/icecast/integrations/icecast.md) +- [Discourse](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/discourse.md) -- [OBS Studio](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/obs_studio.md) +- [Icecast](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/icecast/integrations/icecast.md) -- [RetroShare](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/retroshare/integrations/retroshare.md) +- [OBS Studio](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/obs_studio.md) -- [SABnzbd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sabnzbd.md) +- [SABnzbd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sabnzbd.md) -- [Stream](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/stream.md) +- [Stream](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/stream.md) -- [Twitch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/twitch.md) +- [Twitch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/twitch.md) -- [Zulip](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/zulip.md) +- [Zulip](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/zulip.md) ### Message Brokers -- [ActiveMQ](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/activemq/integrations/activemq.md) +- [ActiveMQ](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/activemq/integrations/activemq.md) -- [Apache Pulsar](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/pulsar/integrations/apache_pulsar.md) +- [Apache Pulsar](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/pulsar/integrations/apache_pulsar.md) -- [Beanstalk](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md) +- [Beanstalk](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/beanstalk/integrations/beanstalk.md) -- [IBM MQ](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_mq.md) +- [IBM MQ](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_mq.md) -- [Kafka Connect](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kafka_connect.md) +- [Kafka Connect](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kafka_connect.md) -- [Kafka ZooKeeper](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kafka_zookeeper.md) +- [Kafka ZooKeeper](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kafka_zookeeper.md) -- [Kafka](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kafka.md) +- [Kafka](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kafka.md) -- [MQTT Blackbox](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mqtt_blackbox.md) +- [MQTT Blackbox](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mqtt_blackbox.md) -- [RabbitMQ](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/rabbitmq/integrations/rabbitmq.md) +- [RabbitMQ](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/rabbitmq/integrations/rabbitmq.md) -- [Redis Queue](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/redis_queue.md) +- [Redis Queue](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/redis_queue.md) -- [VerneMQ](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/vernemq/integrations/vernemq.md) +- [VerneMQ](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/vernemq/integrations/vernemq.md) -- [XMPP Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/xmpp_server.md) +- [XMPP Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/xmpp_server.md) -- [mosquitto](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mosquitto.md) +- [mosquitto](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mosquitto.md) ### Networking Stack and Network Interfaces -- [8430FT modem](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/8430ft_modem.md) +- [8430FT modem](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/8430ft_modem.md) -- [A10 ACOS network devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/a10_acos_network_devices.md) +- [A10 ACOS network devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/a10_acos_network_devices.md) -- [Andrews & Arnold line status](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/andrews_&_arnold_line_status.md) +- [Andrews & Arnold line status](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/andrews_&_arnold_line_status.md) -- [Aruba devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/aruba_devices.md) +- [Aruba devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/aruba_devices.md) -- [Bird Routing Daemon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bird_routing_daemon.md) +- [Bird Routing Daemon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bird_routing_daemon.md) -- [Checkpoint device](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/checkpoint_device.md) +- [Checkpoint device](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/checkpoint_device.md) -- [Cisco ACI](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cisco_aci.md) +- [Cisco ACI](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cisco_aci.md) -- [Citrix NetScaler](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/citrix_netscaler.md) +- [Citrix NetScaler](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/citrix_netscaler.md) -- [DDWRT Routers](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ddwrt_routers.md) +- [DDWRT Routers](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ddwrt_routers.md) -- [FRRouting](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/frrouting.md) +- [FRRouting](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/frrouting.md) -- [Fortigate firewall](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/fortigate_firewall.md) +- [Fortigate firewall](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/fortigate_firewall.md) -- [Freifunk network](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/freifunk_network.md) +- [Freifunk network](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/freifunk_network.md) -- [Fritzbox network devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/fritzbox_network_devices.md) +- [Fritzbox network devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/fritzbox_network_devices.md) -- [Hitron CGN series CPE](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hitron_cgn_series_cpe.md) +- [Hitron CGN series CPE](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hitron_cgn_series_cpe.md) -- [Hitron CODA Cable Modem](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hitron_coda_cable_modem.md) +- [Hitron CODA Cable Modem](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hitron_coda_cable_modem.md) -- [Huawei devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/huawei_devices.md) +- [Huawei devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/huawei_devices.md) -- [Keepalived](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/keepalived.md) +- [Keepalived](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/keepalived.md) -- [Meraki dashboard](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/meraki_dashboard.md) +- [Meraki dashboard](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/meraki_dashboard.md) -- [MikroTik devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mikrotik_devices.md) +- [MikroTik devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mikrotik_devices.md) -- [Mikrotik RouterOS devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mikrotik_routeros_devices.md) +- [Mikrotik RouterOS devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mikrotik_routeros_devices.md) -- [NetFlow](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/netflow.md) +- [NetFlow](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/netflow.md) -- [NetMeter](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/netmeter.md) +- [NetMeter](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/netmeter.md) -- [Open vSwitch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/open_vswitch.md) +- [Open vSwitch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/open_vswitch.md) -- [OpenROADM devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openroadm_devices.md) +- [OpenROADM devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openroadm_devices.md) -- [RIPE Atlas](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ripe_atlas.md) +- [RIPE Atlas](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ripe_atlas.md) -- [SONiC NOS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sonic_nos.md) +- [SONiC NOS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sonic_nos.md) -- [SmartRG 808AC Cable Modem](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/smartrg_808ac_cable_modem.md) +- [SmartRG 808AC Cable Modem](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/smartrg_808ac_cable_modem.md) -- [Starlink (SpaceX)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/starlink_spacex.md) +- [Starlink (SpaceX)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/starlink_spacex.md) -- [Traceroute](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/traceroute.md) +- [Traceroute](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/traceroute.md) -- [Ubiquiti UFiber OLT](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ubiquiti_ufiber_olt.md) +- [Ubiquiti UFiber OLT](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ubiquiti_ufiber_olt.md) -- [Zyxel GS1200-8](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/zyxel_gs1200-8.md) +- [Zyxel GS1200-8](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/zyxel_gs1200-8.md) ### Incident Management -- [OTRS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/otrs.md) +- [OTRS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/otrs.md) -- [StatusPage](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/statuspage.md) +- [StatusPage](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/statuspage.md) ### Observability -- [Collectd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/collectd.md) +- [Collectd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/collectd.md) -- [Dynatrace](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dynatrace.md) +- [Dynatrace](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dynatrace.md) -- [Grafana](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/grafana.md) +- [Grafana](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/grafana.md) -- [Hubble](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hubble.md) +- [Hubble](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hubble.md) -- [Naemon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/naemon.md) +- [Naemon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/naemon.md) -- [Nagios](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/nagios.md) +- [Nagios](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/nagios.md) -- [New Relic](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/new_relic.md) +- [New Relic](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/new_relic.md) ### Other -- [Example collector](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/example/integrations/example_collector.md) - -- [Files and directories](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/filecheck/integrations/files_and_directories.md) - -- [GitHub API rate limit](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/github_api_rate_limit.md) - -- [GitHub repository](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/github_repository.md) +- [Files and directories](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/filecheck/integrations/files_and_directories.md) -- [Netdata Agent alarms](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md) +- [GitHub API rate limit](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/github_api_rate_limit.md) -- [python.d changefinder](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md) +- [GitHub repository](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/github_repository.md) - [python.d zscores](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md) @@ -907,7 +897,7 @@ If you don't see the app/service you'd like to monitor in this list: - [Applications](https://github.com/netdata/netdata/blob/master/src/collectors/apps.plugin/integrations/applications.md) -- [Supervisor](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/supervisord/integrations/supervisor.md) +- [Supervisor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/supervisord/integrations/supervisor.md) - [User Groups](https://github.com/netdata/netdata/blob/master/src/collectors/apps.plugin/integrations/user_groups.md) @@ -915,153 +905,153 @@ If you don't see the app/service you'd like to monitor in this list: ### Provisioning Systems -- [BOSH](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/bosh.md) +- [BOSH](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/bosh.md) -- [Cloud Foundry Firehose](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cloud_foundry_firehose.md) +- [Cloud Foundry Firehose](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cloud_foundry_firehose.md) -- [Cloud Foundry](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cloud_foundry.md) +- [Cloud Foundry](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cloud_foundry.md) -- [Spacelift](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/spacelift.md) +- [Spacelift](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/spacelift.md) ### Search Engines -- [Elasticsearch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/elasticsearch/integrations/elasticsearch.md) +- [Elasticsearch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/elasticsearch/integrations/elasticsearch.md) -- [Meilisearch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/meilisearch.md) +- [Meilisearch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/meilisearch.md) -- [OpenSearch](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/elasticsearch/integrations/opensearch.md) +- [OpenSearch](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/elasticsearch/integrations/opensearch.md) -- [Sphinx](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/sphinx.md) +- [Sphinx](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/sphinx.md) ### Security Systems -- [Certificate Transparency](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/certificate_transparency.md) +- [Certificate Transparency](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/certificate_transparency.md) -- [ClamAV daemon](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/clamav_daemon.md) +- [ClamAV daemon](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/clamav_daemon.md) -- [Clamscan results](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/clamscan_results.md) +- [Clamscan results](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/clamscan_results.md) -- [Crowdsec](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/crowdsec.md) +- [Crowdsec](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/crowdsec.md) -- [Honeypot](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/honeypot.md) +- [Honeypot](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/honeypot.md) -- [Lynis audit reports](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/lynis_audit_reports.md) +- [Lynis audit reports](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/lynis_audit_reports.md) -- [OpenVAS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/openvas.md) +- [OpenVAS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/openvas.md) -- [Rspamd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/rspamd/integrations/rspamd.md) +- [Rspamd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/rspamd/integrations/rspamd.md) -- [SSL Certificate](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ssl_certificate.md) +- [SSL Certificate](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ssl_certificate.md) -- [Suricata](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/suricata.md) +- [Suricata](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/suricata.md) -- [Vault PKI](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/vault_pki.md) +- [Vault PKI](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/vault_pki.md) ### Service Discovery / Registry -- [Consul](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/consul/integrations/consul.md) +- [Consul](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/consul/integrations/consul.md) -- [Kafka Consumer Lag](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kafka_consumer_lag.md) +- [Kafka Consumer Lag](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kafka_consumer_lag.md) -- [ZooKeeper](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/zookeeper/integrations/zookeeper.md) +- [ZooKeeper](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/zookeeper/integrations/zookeeper.md) -- [etcd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/etcd.md) +- [etcd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/etcd.md) ### Storage, Mount Points and Filesystems -- [Adaptec RAID](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/adaptecraid/integrations/adaptec_raid.md) +- [Adaptec RAID](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/adaptecraid/integrations/adaptec_raid.md) -- [Altaro Backup](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/altaro_backup.md) +- [Altaro Backup](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/altaro_backup.md) -- [Borg backup](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/borg_backup.md) +- [Borg backup](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/borg_backup.md) -- [CVMFS clients](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cvmfs_clients.md) +- [CVMFS clients](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cvmfs_clients.md) - [Ceph](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/ceph/integrations/ceph.md) -- [DMCache devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/dmcache/integrations/dmcache_devices.md) +- [DMCache devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/dmcache/integrations/dmcache_devices.md) -- [Dell EMC Isilon cluster](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dell_emc_isilon_cluster.md) +- [Dell EMC Isilon cluster](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dell_emc_isilon_cluster.md) -- [Dell EMC ScaleIO](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/scaleio/integrations/dell_emc_scaleio.md) +- [Dell EMC ScaleIO](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/scaleio/integrations/dell_emc_scaleio.md) -- [Dell EMC XtremIO cluster](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dell_emc_xtremio_cluster.md) +- [Dell EMC XtremIO cluster](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dell_emc_xtremio_cluster.md) -- [Dell PowerMax](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/dell_powermax.md) +- [Dell PowerMax](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/dell_powermax.md) -- [EOS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/eos.md) +- [EOS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/eos.md) -- [Generic storage enclosure tool](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/generic_storage_enclosure_tool.md) +- [Generic storage enclosure tool](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/generic_storage_enclosure_tool.md) -- [HDSentinel](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hdsentinel.md) +- [HDSentinel](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hdsentinel.md) -- [HPE Smart Arrays](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hpssa/integrations/hpe_smart_arrays.md) +- [HPE Smart Arrays](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/hpssa/integrations/hpe_smart_arrays.md) -- [Hadoop Distributed File System (HDFS)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md) +- [Hadoop Distributed File System (HDFS)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md) -- [IBM Spectrum Virtualize](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_spectrum_virtualize.md) +- [IBM Spectrum Virtualize](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_spectrum_virtualize.md) -- [IBM Spectrum](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/ibm_spectrum.md) +- [IBM Spectrum](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/ibm_spectrum.md) -- [IPFS](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/ipfs/integrations/ipfs.md) +- [IPFS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/ipfs/integrations/ipfs.md) -- [LVM logical volumes](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/lvm/integrations/lvm_logical_volumes.md) +- [LVM logical volumes](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/lvm/integrations/lvm_logical_volumes.md) -- [Lagerist Disk latency](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/lagerist_disk_latency.md) +- [Lagerist Disk latency](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/lagerist_disk_latency.md) -- [MegaCLI MegaRAID](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/megacli/integrations/megacli_megaraid.md) +- [MegaCLI MegaRAID](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/megacli/integrations/megacli_megaraid.md) -- [MogileFS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mogilefs.md) +- [MogileFS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mogilefs.md) -- [NVMe devices](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/nvme/integrations/nvme_devices.md) +- [NVMe devices](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nvme/integrations/nvme_devices.md) -- [NetApp Solidfire](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/netapp_solidfire.md) +- [NetApp Solidfire](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/netapp_solidfire.md) -- [Netapp ONTAP API](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/netapp_ontap_api.md) +- [Netapp ONTAP API](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/netapp_ontap_api.md) - [Samba](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/samba/integrations/samba.md) -- [Starwind VSAN VSphere Edition](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/starwind_vsan_vsphere_edition.md) +- [Starwind VSAN VSphere Edition](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/starwind_vsan_vsphere_edition.md) -- [StoreCLI RAID](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/storcli/integrations/storecli_raid.md) +- [StoreCLI RAID](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/storcli/integrations/storecli_raid.md) -- [Storidge](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/storidge.md) +- [Storidge](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/storidge.md) -- [Synology ActiveBackup](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/synology_activebackup.md) +- [Synology ActiveBackup](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/synology_activebackup.md) -- [ZFS Pools](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/zfspool/integrations/zfs_pools.md) +- [ZFS Pools](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/zfspool/integrations/zfs_pools.md) ### Synthetic Checks -- [Blackbox](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/blackbox.md) +- [Blackbox](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/blackbox.md) -- [Domain expiration date](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/whoisquery/integrations/domain_expiration_date.md) +- [Domain expiration date](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/whoisquery/integrations/domain_expiration_date.md) -- [HTTP Endpoints](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/httpcheck/integrations/http_endpoints.md) +- [HTTP Endpoints](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/httpcheck/integrations/http_endpoints.md) - [IOPing](https://github.com/netdata/netdata/blob/master/src/collectors/ioping.plugin/integrations/ioping.md) - [Idle OS Jitter](https://github.com/netdata/netdata/blob/master/src/collectors/idlejitter.plugin/integrations/idle_os_jitter.md) -- [Monit](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/monit/integrations/monit.md) +- [Monit](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/monit/integrations/monit.md) -- [Ping](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/ping/integrations/ping.md) +- [Ping](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/ping/integrations/ping.md) -- [Pingdom](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/pingdom.md) +- [Pingdom](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/pingdom.md) -- [Site 24x7](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/site_24x7.md) +- [Site 24x7](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/site_24x7.md) -- [TCP Endpoints](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/portcheck/integrations/tcp_endpoints.md) +- [TCP Endpoints](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/portcheck/integrations/tcp_endpoints.md) -- [Uptimerobot](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/uptimerobot.md) +- [Uptimerobot](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/uptimerobot.md) -- [X.509 certificate](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/x509check/integrations/x.509_certificate.md) +- [X.509 certificate](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/x509check/integrations/x.509_certificate.md) ### System Clock and NTP -- [Chrony](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/chrony/integrations/chrony.md) +- [Chrony](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/chrony/integrations/chrony.md) -- [NTPd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/ntpd/integrations/ntpd.md) +- [NTPd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/ntpd/integrations/ntpd.md) - [Timex](https://github.com/netdata/netdata/blob/master/src/collectors/timex.plugin/integrations/timex.md) @@ -1069,23 +1059,23 @@ If you don't see the app/service you'd like to monitor in this list: - [Systemd Services](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/integrations/systemd_services.md) -- [Systemd Units](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/systemdunits/integrations/systemd_units.md) +- [Systemd Units](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/systemdunits/integrations/systemd_units.md) -- [systemd-logind users](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/logind/integrations/systemd-logind_users.md) +- [systemd-logind users](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/logind/integrations/systemd-logind_users.md) ### Task Queues -- [Celery](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/celery.md) +- [Celery](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/celery.md) -- [Mesos](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/mesos.md) +- [Mesos](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/mesos.md) -- [Slurm](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/slurm.md) +- [Slurm](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/slurm.md) ### Telephony Servers -- [GTP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gtp.md) +- [GTP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gtp.md) -- [Kannel](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/kannel.md) +- [Kannel](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/kannel.md) - [OpenSIPS](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/opensips/integrations/opensips.md) @@ -1093,88 +1083,88 @@ If you don't see the app/service you'd like to monitor in this list: - [APC UPS](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md) -- [Eaton UPS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/eaton_ups.md) +- [Eaton UPS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/eaton_ups.md) -- [UPS (NUT)](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/upsd/integrations/ups_nut.md) +- [UPS (NUT)](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/upsd/integrations/ups_nut.md) ### VPNs -- [Fastd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/fastd.md) +- [Fastd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/fastd.md) - [Libreswan](https://github.com/netdata/netdata/blob/master/src/collectors/charts.d.plugin/libreswan/integrations/libreswan.md) -- [OpenVPN status log](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/openvpn_status_log/integrations/openvpn_status_log.md) +- [OpenVPN status log](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/openvpn_status_log/integrations/openvpn_status_log.md) -- [OpenVPN](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/openvpn/integrations/openvpn.md) +- [OpenVPN](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/openvpn/integrations/openvpn.md) -- [SoftEther VPN Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/softether_vpn_server.md) +- [SoftEther VPN Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/softether_vpn_server.md) -- [Speedify CLI](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/speedify_cli.md) +- [Speedify CLI](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/speedify_cli.md) -- [Tor](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/tor/integrations/tor.md) +- [Tor](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/tor/integrations/tor.md) -- [WireGuard](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/wireguard/integrations/wireguard.md) +- [WireGuard](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/wireguard/integrations/wireguard.md) -- [strongSwan](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/strongswan.md) +- [strongSwan](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/strongswan.md) ### Web Servers and Web Proxies -- [APIcast](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/apicast.md) +- [APIcast](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/apicast.md) -- [Apache](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/apache/integrations/apache.md) +- [Apache](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/apache/integrations/apache.md) -- [Clash](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/clash.md) +- [Clash](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/clash.md) -- [Cloudflare PCAP](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/cloudflare_pcap.md) +- [Cloudflare PCAP](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/cloudflare_pcap.md) -- [Envoy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/envoy/integrations/envoy.md) +- [Envoy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/envoy/integrations/envoy.md) -- [Gobetween](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/gobetween.md) +- [Gobetween](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/gobetween.md) -- [HAProxy](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/haproxy/integrations/haproxy.md) +- [HAProxy](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/haproxy/integrations/haproxy.md) -- [HHVM](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/prometheus/integrations/hhvm.md) +- [HHVM](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/prometheus/integrations/hhvm.md) -- [HTTPD](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/apache/integrations/httpd.md) +- [HTTPD](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/apache/integrations/httpd.md) -- [Lighttpd](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/lighttpd/integrations/lighttpd.md) +- [Lighttpd](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/lighttpd/integrations/lighttpd.md) -- [Litespeed](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/litespeed/integrations/litespeed.md) +- [Litespeed](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/litespeed/integrations/litespeed.md) -- [NGINX Plus](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/nginxplus/integrations/nginx_plus.md) +- [NGINX Plus](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nginxplus/integrations/nginx_plus.md) -- [NGINX VTS](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/nginxvts/integrations/nginx_vts.md) +- [NGINX VTS](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nginxvts/integrations/nginx_vts.md) -- [NGINX](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/nginx/integrations/nginx.md) +- [NGINX](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/nginx/integrations/nginx.md) -- [PHP-FPM](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/phpfpm/integrations/php-fpm.md) +- [PHP-FPM](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/phpfpm/integrations/php-fpm.md) -- [Squid log files](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/squidlog/integrations/squid_log_files.md) +- [Squid log files](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/squidlog/integrations/squid_log_files.md) -- [Squid](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/squid/integrations/squid.md) +- [Squid](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/squid/integrations/squid.md) -- [Tengine](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/tengine/integrations/tengine.md) +- [Tengine](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/tengine/integrations/tengine.md) -- [Tomcat](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/tomcat/integrations/tomcat.md) +- [Tomcat](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/tomcat/integrations/tomcat.md) -- [Traefik](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/traefik/integrations/traefik.md) +- [Traefik](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/traefik/integrations/traefik.md) - [Varnish](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/varnish/integrations/varnish.md) -- [Web server log files](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/weblog/integrations/web_server_log_files.md) +- [Web server log files](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/weblog/integrations/web_server_log_files.md) -- [uWSGI](https://github.com/netdata/netdata/blob/master/src/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md) +- [uWSGI](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/uwsgi/integrations/uwsgi.md) ### Windows Systems -- [Active Directory](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/active_directory.md) +- [Active Directory](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/active_directory.md) -- [HyperV](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/hyperv.md) +- [HyperV](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/hyperv.md) -- [MS Exchange](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/ms_exchange.md) +- [MS Exchange](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/ms_exchange.md) -- [MS SQL Server](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/ms_sql_server.md) +- [MS SQL Server](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/ms_sql_server.md) -- [NET Framework](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/net_framework.md) +- [NET Framework](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/net_framework.md) -- [Windows](https://github.com/netdata/netdata/blob/master/src/go/collectors/go.d.plugin/modules/windows/integrations/windows.md) +- [Windows](https://github.com/netdata/netdata/blob/master/src/go/plugin/go.d/modules/windows/integrations/windows.md) diff --git a/src/collectors/README.md b/src/collectors/README.md index dc043173c..0fd5983b7 100644 --- a/src/collectors/README.md +++ b/src/collectors/README.md @@ -50,7 +50,7 @@ specifics of what a given collector does. - **Orchestrators** are external plugins that run and manage one or more modules. They run as independent processes. The Go orchestrator is in active development. - - [go.d.plugin](/src/go/collectors/go.d.plugin/README.md): An orchestrator for data + - [go.d.plugin](/src/go/plugin/go.d/README.md): An orchestrator for data collection modules written in `go`. - [python.d.plugin](/src/collectors/python.d.plugin/README.md): diff --git a/src/collectors/REFERENCE.md b/src/collectors/REFERENCE.md index 648add3ce..e480a16d8 100644 --- a/src/collectors/REFERENCE.md +++ b/src/collectors/REFERENCE.md @@ -93,7 +93,7 @@ metrics, will automatically enable data collection for the application in questi When Netdata starts up, each collector searches for exposed metrics on the default endpoint established by that service or application's standard installation procedure. For example, -the [Nginx collector](/src/go/collectors/go.d.plugin/modules/nginx/README.md) searches at +the [Nginx collector](/src/go/plugin/go.d/modules/nginx/README.md) searches at `http://127.0.0.1/stub_status` for exposed metrics in the correct format. If an Nginx web server is running and exposes metrics on that endpoint, the collector begins gathering them. diff --git a/src/collectors/all.h b/src/collectors/all.h index 91bd9c230..3b96faa10 100644 --- a/src/collectors/all.h +++ b/src/collectors/all.h @@ -403,7 +403,6 @@ // Logs Management #define NETDATA_CHART_PRIO_LOGS_BASE 95000 // many charts -#define NETDATA_CHART_PRIO_LOGS_STATS_BASE 160000 // logsmanagement stats in "Netdata Monitoring" // PCI diff --git a/src/collectors/apps.plugin/apps_groups.conf b/src/collectors/apps.plugin/apps_groups.conf index 41b69ed69..724616c18 100644 --- a/src/collectors/apps.plugin/apps_groups.conf +++ b/src/collectors/apps.plugin/apps_groups.conf @@ -92,7 +92,6 @@ go.d.plugin: *go.d.plugin* slabinfo.plugin: *slabinfo.plugin* ebpf.plugin: *ebpf.plugin* debugfs.plugin: *debugfs.plugin* -logs-management.plugin: *logs-management.plugin* # agent-service-discovery agent_sd: agent_sd @@ -376,6 +375,12 @@ inetd: inetd xinetd # ----------------------------------------------------------------------------- # other application servers +i2pd: i2pd + +rethinkdb: rethinkdb + +beanstalkd: beanstalkd + rspamd: rspamd consul: consul diff --git a/src/collectors/apps.plugin/apps_output.c b/src/collectors/apps.plugin/apps_output.c index 0bf8e9ae0..84928e641 100644 --- a/src/collectors/apps.plugin/apps_output.c +++ b/src/collectors/apps.plugin/apps_output.c @@ -62,30 +62,6 @@ void send_resource_usage_to_netdata(usec_t dt) { "DIMENSION new_pids 'new pids' incremental 1 1\n" , update_every ); - - fprintf(stdout, - "CHART netdata.apps_fix '' 'Apps Plugin Normalization Ratios' 'percentage' apps.plugin netdata.apps_fix line 140002 %1$d\n" - "DIMENSION utime '' absolute 1 %2$llu\n" - "DIMENSION stime '' absolute 1 %2$llu\n" - "DIMENSION gtime '' absolute 1 %2$llu\n" - "DIMENSION minflt '' absolute 1 %2$llu\n" - "DIMENSION majflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL - ); - - if(include_exited_childs) - fprintf(stdout, - "CHART netdata.apps_children_fix '' 'Apps Plugin Exited Children Normalization Ratios' 'percentage' apps.plugin netdata.apps_children_fix line 140003 %1$d\n" - "DIMENSION cutime '' absolute 1 %2$llu\n" - "DIMENSION cstime '' absolute 1 %2$llu\n" - "DIMENSION cgtime '' absolute 1 %2$llu\n" - "DIMENSION cminflt '' absolute 1 %2$llu\n" - "DIMENSION cmajflt '' absolute 1 %2$llu\n" - , update_every - , RATES_DETAIL - ); - } fprintf(stdout, @@ -118,39 +94,6 @@ void send_resource_usage_to_netdata(usec_t dt) { , apps_groups_targets_count , targets_assignment_counter ); - - fprintf(stdout, - "BEGIN netdata.apps_fix %"PRIu64"\n" - "SET utime = %u\n" - "SET stime = %u\n" - "SET gtime = %u\n" - "SET minflt = %u\n" - "SET majflt = %u\n" - "END\n" - , dt - , (unsigned int)(utime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(stime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(gtime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(minflt_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(majflt_fix_ratio * 100 * RATES_DETAIL) - ); - - if(include_exited_childs) - fprintf(stdout, - "BEGIN netdata.apps_children_fix %"PRIu64"\n" - "SET cutime = %u\n" - "SET cstime = %u\n" - "SET cgtime = %u\n" - "SET cminflt = %u\n" - "SET cmajflt = %u\n" - "END\n" - , dt - , (unsigned int)(cutime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(cstime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(cgtime_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(cminflt_fix_ratio * 100 * RATES_DETAIL) - , (unsigned int)(cmajflt_fix_ratio * 100 * RATES_DETAIL) - ); } void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt) { diff --git a/src/collectors/apps.plugin/apps_plugin.c b/src/collectors/apps.plugin/apps_plugin.c index b660f8171..8fe1ff008 100644 --- a/src/collectors/apps.plugin/apps_plugin.c +++ b/src/collectors/apps.plugin/apps_plugin.c @@ -51,7 +51,6 @@ size_t inodes_changed_counter = 0, links_changed_counter = 0, targets_assignment_counter = 0, - all_pids_count = 0, // the number of processes running apps_groups_targets_count = 0; // # of apps_groups.conf targets int @@ -136,20 +135,6 @@ struct target size_t pagesize; -struct pid_stat - *root_of_pids = NULL, // global list of all processes running - **all_pids = NULL; // to avoid allocations, we pre-allocate - // a pointer for each pid in the entire pid space. - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) -// Another pre-allocated list of all possible pids. -// We need it to pids and assign them a unique sortlist id, so that we -// read parents before children. This is needed to prevent a situation where -// a child is found running, but until we read its parent, it has exited and -// its parent has accumulated its resources. -pid_t *all_pids_sortlist = NULL; -#endif - // ---------------------------------------------------------------------------- int managed_log(struct pid_stat *p, PID_LOG log, int status) { @@ -208,7 +193,7 @@ int managed_log(struct pid_stat *p, PID_LOG log, int status) { } } } - errno = 0; + errno_clear(); } else if(unlikely(p->log_thrown & log)) { // netdata_log_error("unsetting log %u on pid %d", log, p->pid); @@ -300,12 +285,14 @@ static void apply_apps_groups_targets_inheritance(void) { } // init goes always to default target - if(all_pids[INIT_PID] && !all_pids[INIT_PID]->matched_by_config) - all_pids[INIT_PID]->target = apps_groups_default_target; + struct pid_stat *pi = find_pid_entry(INIT_PID); + if(pi && !pi->matched_by_config) + pi->target = apps_groups_default_target; // pid 0 goes always to default target - if(all_pids[0] && !all_pids[INIT_PID]->matched_by_config) - all_pids[0]->target = apps_groups_default_target; + pi = find_pid_entry(0); + if(pi && !pi->matched_by_config) + pi->target = apps_groups_default_target; // give a default target on all top level processes if(unlikely(debug_enabled)) loops++; @@ -320,8 +307,9 @@ static void apply_apps_groups_targets_inheritance(void) { p->sortlist = sortlist++; } - if(all_pids[1]) - all_pids[1]->sortlist = sortlist++; + pi = find_pid_entry(1); + if(pi) + pi->sortlist = sortlist++; // give a target to all merged child processes found = 1; @@ -1052,12 +1040,7 @@ int main(int argc, char **argv) { netdata_log_info("started on pid %d", getpid()); users_and_groups_init(); - -#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) - all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); -#endif - - all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); + pids_init(); // ------------------------------------------------------------------------ // the event loop for functions diff --git a/src/collectors/apps.plugin/apps_plugin.h b/src/collectors/apps.plugin/apps_plugin.h index ce4d815ad..a085872d9 100644 --- a/src/collectors/apps.plugin/apps_plugin.h +++ b/src/collectors/apps.plugin/apps_plugin.h @@ -17,9 +17,7 @@ #include <sys/proc_info.h> #include <sys/sysctl.h> #include <mach/mach_time.h> // For mach_timebase_info_data_t and mach_timebase_info -#endif -#if defined(__APPLE__) extern mach_timebase_info_data_t mach_info; #endif @@ -47,7 +45,6 @@ struct pid_info { struct proc_taskinfo taskinfo; struct proc_bsdinfo bsdinfo; struct rusage_info_v4 rusageinfo; - }; #endif @@ -467,9 +464,7 @@ extern struct target *users_root_target, *groups_root_target; -extern struct pid_stat - *root_of_pids, - **all_pids; +extern struct pid_stat *root_of_pids; extern int update_every; extern unsigned int time_factor; @@ -559,4 +554,7 @@ void send_charts_updates_to_netdata(struct target *root, const char *type, const void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt); void send_resource_usage_to_netdata(usec_t dt); +void pids_init(void); +struct pid_stat *find_pid_entry(pid_t pid); + #endif //NETDATA_APPS_PLUGIN_H diff --git a/src/collectors/apps.plugin/apps_proc_pid_limits.c b/src/collectors/apps.plugin/apps_proc_pid_limits.c index a1e15f63c..7485086ba 100644 --- a/src/collectors/apps.plugin/apps_proc_pid_limits.c +++ b/src/collectors/apps.plugin/apps_proc_pid_limits.c @@ -33,7 +33,7 @@ static inline bool read_proc_pid_limits_per_os(struct pid_stat *p, void *ptr __m bool ret = false; bool read_limits = false; - errno = 0; + errno_clear(); proc_pid_limits_buffer[0] = '\0'; kernel_uint_t all_fds = pid_openfds_sum(p); diff --git a/src/collectors/apps.plugin/apps_proc_pids.c b/src/collectors/apps.plugin/apps_proc_pids.c index fd7e776fa..b53060d60 100644 --- a/src/collectors/apps.plugin/apps_proc_pids.c +++ b/src/collectors/apps.plugin/apps_proc_pids.c @@ -2,18 +2,44 @@ #include "apps_plugin.h" -static inline struct pid_stat *get_pid_entry(pid_t pid) { - if(likely(all_pids[pid])) - return all_pids[pid]; +static struct pid_stat **all_pids = NULL; +size_t all_pids_count = 0; // the number of processes running + +struct pid_stat *root_of_pids = NULL; // global linked list of all processes running + +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) +// Another pre-allocated list of all possible pids. +// We need it to assign them a unique sortlist id, so that we +// read parents before children. This is needed to prevent a situation where +// a child is found running, but until we read its parent, it has exited and +// its parent has accumulated its resources. +pid_t *all_pids_sortlist = NULL; +#endif + +void pids_init(void) { +#if (ALL_PIDS_ARE_READ_INSTANTLY == 0) + all_pids_sortlist = callocz(sizeof(pid_t), (size_t)pid_max + 1); +#endif + + all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1); +} - struct pid_stat *p = callocz(sizeof(struct pid_stat), 1); +inline struct pid_stat *find_pid_entry(pid_t pid) { + return all_pids[pid]; +} + +static inline struct pid_stat *get_or_allocate_pid_entry(pid_t pid) { + struct pid_stat *p = find_pid_entry(pid); + if(likely(p)) + return p; + + p = callocz(sizeof(struct pid_stat), 1); p->fds = mallocz(sizeof(struct pid_fd) * MAX_SPARE_FDS); p->fds_size = MAX_SPARE_FDS; init_pid_fds(p, 0, p->fds_size); p->pid = pid; DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(root_of_pids, p, prev, next); - all_pids[pid] = p; all_pids_count++; @@ -21,7 +47,7 @@ static inline struct pid_stat *get_pid_entry(pid_t pid) { } static inline void del_pid_entry(pid_t pid) { - struct pid_stat *p = all_pids[pid]; + struct pid_stat *p = find_pid_entry(pid); if(unlikely(!p)) { netdata_log_error("attempted to free pid %d that is not allocated.", pid); @@ -62,7 +88,7 @@ static inline int collect_data_for_pid(pid_t pid, void *ptr) { return 0; } - struct pid_stat *p = get_pid_entry(pid); + struct pid_stat *p = get_or_allocate_pid_entry(pid); if(unlikely(!p || p->read)) return 0; p->read = true; diff --git a/src/collectors/cgroups.plugin/cgroup-discovery.c b/src/collectors/cgroups.plugin/cgroup-discovery.c index 61d5c08ff..d880f8a71 100644 --- a/src/collectors/cgroups.plugin/cgroup-discovery.c +++ b/src/collectors/cgroups.plugin/cgroup-discovery.c @@ -178,11 +178,9 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id); - pid_t cgroup_pid; - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id); - if (!fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_rename_script, cg->id, cg->intermediate_id, NULL); + if (!instance) { collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id); cg->pending_renames = 0; cg->processed = 1; @@ -190,8 +188,8 @@ static inline void discovery_rename_cgroup(struct cgroup *cg) { } char buffer[CGROUP_CHARTID_LINE_MAX + 1]; - char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output); - int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, instance->child_stdout_fp); + int exit_code = spawn_popen_wait(instance); switch (exit_code) { case 0: @@ -1085,7 +1083,6 @@ static void cgroup_cleanup_ebpf_integration() static inline void read_cgroup_network_interfaces(struct cgroup *cg) { netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id); - pid_t cgroup_pid; char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) { @@ -1096,16 +1093,15 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id); - FILE *fp_child_input, *fp_child_output; - (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier); - if(!fp_child_output) { + POPEN_INSTANCE *instance = spawn_popen_run_variadic(cgroups_network_interface_script, "--cgroup", cgroup_identifier, NULL); + if(!instance) { collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier); return; } char *s; char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, instance->child_stdout_fp))) { trim(s); if(*s && *s != '\n') { @@ -1145,7 +1141,7 @@ static inline void read_cgroup_network_interfaces(struct cgroup *cg) { } } - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + spawn_popen_wait(instance); } static inline void discovery_process_cgroup(struct cgroup *cg) { diff --git a/src/collectors/cgroups.plugin/cgroup-network.c b/src/collectors/cgroups.plugin/cgroup-network.c index 685282e89..4cb5cbabe 100644 --- a/src/collectors/cgroups.plugin/cgroup-network.c +++ b/src/collectors/cgroups.plugin/cgroup-network.c @@ -421,19 +421,19 @@ void detect_veth_interfaces(pid_t pid) { host = read_proc_net_dev("host", netdata_configured_host_prefix); if(!host) { - errno = 0; + errno_clear(); collector_error("cannot read host interface list."); goto cleanup; } if(!eligible_ifaces(host)) { - errno = 0; + errno_clear(); collector_info("there are no double-linked host interfaces available."); goto cleanup; } if(switch_namespace(netdata_configured_host_prefix, pid)) { - errno = 0; + errno_clear(); collector_error("cannot switch to the namespace of pid %u", (unsigned int) pid); goto cleanup; } @@ -444,13 +444,13 @@ void detect_veth_interfaces(pid_t pid) { cgroup = read_proc_net_dev("cgroup", NULL); if(!cgroup) { - errno = 0; + errno_clear(); collector_error("cannot read cgroup interface list."); goto cleanup; } if(!eligible_ifaces(cgroup)) { - errno = 0; + errno_clear(); collector_error("there are not double-linked cgroup interfaces available."); goto cleanup; } @@ -505,22 +505,20 @@ void call_the_helper(pid_t pid, const char *cgroup) { collector_info("running: %s", command); - pid_t cgroup_pid; - FILE *fp_child_input, *fp_child_output; + POPEN_INSTANCE *pi; - if(cgroup) { - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup); - } + if(cgroup) + pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--cgroup", cgroup, NULL); else { char buffer[100]; snprintfz(buffer, sizeof(buffer) - 1, "%d", pid); - (void)netdata_popen_raw_default_flags(&cgroup_pid, environment, &fp_child_input, &fp_child_output, PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer); + pi = spawn_popen_run_variadic(PLUGINS_DIR "/cgroup-network-helper.sh", "--pid", buffer, NULL); } - if(fp_child_output) { + if(pi) { char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1]; char *s; - while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) { + while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, pi->child_stdout_fp))) { trim(s); if(*s && *s != '\n') { @@ -536,7 +534,7 @@ void call_the_helper(pid_t pid, const char *cgroup) { } } - netdata_pclose(fp_child_input, fp_child_output, cgroup_pid); + spawn_popen_kill(pi); } else collector_error("cannot execute cgroup-network helper script: %s", command); @@ -701,7 +699,7 @@ int main(int argc, char **argv) { pid = atoi(argv[arg+1]); if(pid <= 0) { - errno = 0; + errno_clear(); collector_error("Invalid pid %d given", (int) pid); return 2; } @@ -719,7 +717,7 @@ int main(int argc, char **argv) { if(helper) call_the_helper(pid, cgroup); if(pid <= 0 && !detected_devices) { - errno = 0; + errno_clear(); collector_error("Cannot find a cgroup PID from cgroup '%s'", cgroup); } } diff --git a/src/collectors/cgroups.plugin/sys_fs_cgroup.c b/src/collectors/cgroups.plugin/sys_fs_cgroup.c index 592152401..5fdefa863 100644 --- a/src/collectors/cgroups.plugin/sys_fs_cgroup.c +++ b/src/collectors/cgroups.plugin/sys_fs_cgroup.c @@ -73,30 +73,19 @@ struct discovery_thread discovery_thread; #define MAXSIZE_PROC_CMDLINE 4096 static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) { - pid_t command_pid; enum cgroups_systemd_setting retval = SYSTEMD_CGROUP_ERR; char buf[MAXSIZE_PROC_CMDLINE]; char *begin, *end; - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen(exec, &command_pid, &fp_child_input); - - if (!fp_child_output) - return retval; - - int fd = fileno(fp_child_output); - if (fd == -1 ) { - collector_error("Cannot get the output of \"%s\": failed to get file descriptor", exec); - netdata_pclose(fp_child_input, fp_child_output, command_pid); + POPEN_INSTANCE *pi = spawn_popen_run(exec); + if(!pi) return retval; - } struct pollfd pfd; - pfd.fd = fd; + pfd.fd = spawn_server_instance_read_fd(pi->si); pfd.events = POLLIN; int timeout = 3000; // milliseconds - int ret = poll(&pfd, 1, timeout); if (ret == -1) { @@ -104,7 +93,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) } else if (ret == 0) { collector_info("Cannot get the output of \"%s\" within timeout (%d ms)", exec, timeout); } else { - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, pi->child_stdout_fp) != NULL) { if ((begin = strstr(buf, SYSTEMD_HIERARCHY_STRING))) { end = begin = begin + strlen(SYSTEMD_HIERARCHY_STRING); if (!*begin) @@ -123,7 +112,7 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) } } - if (netdata_pclose(fp_child_input, fp_child_output, command_pid)) + if(spawn_popen_wait(pi) != 0) return SYSTEMD_CGROUP_ERR; return retval; @@ -131,41 +120,56 @@ static enum cgroups_systemd_setting cgroups_detect_systemd(const char *exec) static enum cgroups_type cgroups_try_detect_version() { - pid_t command_pid; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/cgroup"); + struct statfs fsinfo; + + // https://github.com/systemd/systemd/blob/main/docs/CGROUP_DELEGATION.md#three-different-tree-setups- + // ├── statfs("/sys/fs/cgroup/") + // │ └── .f_type + // │ ├── CGROUP2_SUPER_MAGIC (Unified mode) + // │ └── TMPFS_MAGIC (Legacy or Hybrid mode) + // ├── statfs("/sys/fs/cgroup/unified/") + // │ └── .f_type + // │ ├── CGROUP2_SUPER_MAGIC (Hybrid mode) + // │ └── Otherwise, you're in legacy mode + if (!statfs(filename, &fsinfo)) { +#if defined CGROUP2_SUPER_MAGIC + if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) + return CGROUPS_V2; +#endif +#if defined TMPFS_MAGIC + if (fsinfo.f_type == TMPFS_MAGIC) { + // either hybrid or legacy + return CGROUPS_V1; + } +#endif + } + + collector_info("cgroups version: can't detect using statfs (fs type), falling back to heuristics."); + char buf[MAXSIZE_PROC_CMDLINE]; enum cgroups_systemd_setting systemd_setting; int cgroups2_available = 0; // 1. check if cgroups2 available on system at all - FILE *fp_child_input; - FILE *fp_child_output = netdata_popen("grep cgroup /proc/filesystems", &command_pid, &fp_child_input); - if (!fp_child_output) { - collector_error("popen failed"); + POPEN_INSTANCE *instance = spawn_popen_run("grep cgroup /proc/filesystems"); + if(!instance) { + collector_error("cannot run 'grep cgroup /proc/filesystems'"); return CGROUPS_AUTODETECT_FAIL; } - while (fgets(buf, MAXSIZE_PROC_CMDLINE, fp_child_output) != NULL) { + while (fgets(buf, MAXSIZE_PROC_CMDLINE, instance->child_stdout_fp) != NULL) { if (strstr(buf, "cgroup2")) { cgroups2_available = 1; break; } } - if(netdata_pclose(fp_child_input, fp_child_output, command_pid)) + if(spawn_popen_wait(instance) != 0) return CGROUPS_AUTODETECT_FAIL; if(!cgroups2_available) return CGROUPS_V1; -#if defined CGROUP2_SUPER_MAGIC - // 2. check filesystem type for the default mountpoint - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/fs/cgroup"); - struct statfs fsinfo; - if (!statfs(filename, &fsinfo)) { - if (fsinfo.f_type == CGROUP2_SUPER_MAGIC) - return CGROUPS_V2; - } -#endif - // 3. check systemd compiletime setting if ((systemd_setting = cgroups_detect_systemd("systemd --version")) == SYSTEMD_CGROUP_ERR) systemd_setting = cgroups_detect_systemd(SYSTEMD_CMD_RHEL); diff --git a/src/collectors/charts.d.plugin/ap/README.md b/src/collectors/charts.d.plugin/ap/README.md deleted file mode 120000 index 5b6e75130..000000000 --- a/src/collectors/charts.d.plugin/ap/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/access_points.md
\ No newline at end of file diff --git a/src/collectors/charts.d.plugin/ap/ap.chart.sh b/src/collectors/charts.d.plugin/ap/ap.chart.sh deleted file mode 100644 index 80c9dc602..000000000 --- a/src/collectors/charts.d.plugin/ap/ap.chart.sh +++ /dev/null @@ -1,179 +0,0 @@ -# shellcheck shell=bash -# no need for shebang - this file is loaded from charts.d.plugin -# SPDX-License-Identifier: GPL-3.0-or-later - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> -# - -# _update_every is a special variable - it holds the number of seconds -# between the calls of the _update() function -ap_update_every= -ap_priority=6900 - -declare -A ap_devs=() - -# _check is called once, to find out if this chart should be enabled or not -ap_check() { - require_cmd iw || return 1 - local ev - ev=$(run iw dev | awk ' - BEGIN { - i = ""; - ssid = ""; - ap = 0; - } - /^[ \t]+Interface / { - if( ap == 1 ) { - print "ap_devs[" i "]=\"" ssid "\"" - } - - i = $2; - ssid = ""; - ap = 0; - } - /^[ \t]+ssid / { ssid = $2; } - /^[ \t]+type AP$/ { ap = 1; } - END { - if( ap == 1 ) { - print "ap_devs[" i "]=\"" ssid "\"" - } - } - ') - eval "${ev}" - - # this should return: - # - 0 to enable the chart - # - 1 to disable the chart - - [ ${#ap_devs[@]} -gt 0 ] && return 0 - error "no devices found in AP mode, with 'iw dev'" - return 1 -} - -# _create is called once, to create the charts -ap_create() { - local ssid dev - - for dev in "${!ap_devs[@]}"; do - ssid="${ap_devs[${dev}]}" - - # create the chart with 3 dimensions - cat << EOF -CHART ap_clients.${dev} '' "Connected clients to ${ssid} on ${dev}" "clients" ${dev} ap.clients line $((ap_priority + 1)) $ap_update_every '' '' 'ap' -DIMENSION clients '' absolute 1 1 - -CHART ap_bandwidth.${dev} '' "Bandwidth for ${ssid} on ${dev}" "kilobits/s" ${dev} ap.net area $((ap_priority + 2)) $ap_update_every '' '' 'ap' -DIMENSION received '' incremental 8 1024 -DIMENSION sent '' incremental -8 1024 - -CHART ap_packets.${dev} '' "Packets for ${ssid} on ${dev}" "packets/s" ${dev} ap.packets line $((ap_priority + 3)) $ap_update_every '' '' 'ap' -DIMENSION received '' incremental 1 1 -DIMENSION sent '' incremental -1 1 - -CHART ap_issues.${dev} '' "Transmit Issues for ${ssid} on ${dev}" "issues/s" ${dev} ap.issues line $((ap_priority + 4)) $ap_update_every '' '' 'ap' -DIMENSION retries 'tx retries' incremental 1 1 -DIMENSION failures 'tx failures' incremental -1 1 - -CHART ap_signal.${dev} '' "Average Signal for ${ssid} on ${dev}" "dBm" ${dev} ap.signal line $((ap_priority + 5)) $ap_update_every '' '' 'ap' -DIMENSION signal 'average signal' absolute 1 1000 - -CHART ap_bitrate.${dev} '' "Bitrate for ${ssid} on ${dev}" "Mbps" ${dev} ap.bitrate line $((ap_priority + 6)) $ap_update_every '' '' 'ap' -DIMENSION receive '' absolute 1 1000 -DIMENSION transmit '' absolute -1 1000 -DIMENSION expected 'expected throughput' absolute 1 1000 -EOF - done - - return 0 -} - -# _update is called continuously, to collect the values -ap_update() { - # the first argument to this function is the microseconds since last update - # pass this parameter to the BEGIN statement (see below). - - # do all the work to collect / calculate the values - # for each dimension - # remember: KEEP IT SIMPLE AND SHORT - - for dev in "${!ap_devs[@]}"; do - echo - echo "DEVICE ${dev}" - iw "${dev}" station dump - done | awk ' - function zero_data() { - dev = ""; - c = 0; - rb = 0; - tb = 0; - rp = 0; - tp = 0; - tr = 0; - tf = 0; - tt = 0; - rt = 0; - s = 0; - g = 0; - e = 0; - } - function print_device() { - if(dev != "" && length(dev) > 0) { - print "BEGIN ap_clients." dev; - print "SET clients = " c; - print "END"; - print "BEGIN ap_bandwidth." dev; - print "SET received = " rb; - print "SET sent = " tb; - print "END"; - print "BEGIN ap_packets." dev; - print "SET received = " rp; - print "SET sent = " tp; - print "END"; - print "BEGIN ap_issues." dev; - print "SET retries = " tr; - print "SET failures = " tf; - print "END"; - - if( c == 0 ) c = 1; - print "BEGIN ap_signal." dev; - print "SET signal = " int(s / c); - print "END"; - print "BEGIN ap_bitrate." dev; - print "SET receive = " int(rt / c); - print "SET transmit = " int(tt / c); - print "SET expected = " int(e / c); - print "END"; - } - zero_data(); - } - BEGIN { - zero_data(); - } - /^DEVICE / { - print_device(); - dev = $2; - } - /^Station/ { c++; } - /^[ \t]+rx bytes:/ { rb += $3; } - /^[ \t]+tx bytes:/ { tb += $3; } - /^[ \t]+rx packets:/ { rp += $3; } - /^[ \t]+tx packets:/ { tp += $3; } - /^[ \t]+tx retries:/ { tr += $3; } - /^[ \t]+tx failed:/ { tf += $3; } - /^[ \t]+signal:/ { x = $2; s += x * 1000; } - /^[ \t]+rx bitrate:/ { x = $3; rt += x * 1000; } - /^[ \t]+tx bitrate:/ { x = $3; tt += x * 1000; } - /^[ \t]+expected throughput:(.*)Mbps/ { - x=$3; - sub(/Mbps/, "", x); - e += x * 1000; - } - END { - print_device(); - } - ' - - return 0 -} diff --git a/src/collectors/charts.d.plugin/ap/ap.conf b/src/collectors/charts.d.plugin/ap/ap.conf deleted file mode 100644 index 38fc157ce..000000000 --- a/src/collectors/charts.d.plugin/ap/ap.conf +++ /dev/null @@ -1,23 +0,0 @@ -# no need for shebang - this file is loaded from charts.d.plugin - -# netdata -# real-time performance and health monitoring, done right! -# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> -# GPL v3+ - -# nothing fancy to configure. -# this module will run -# iw dev - to find wireless devices in AP mode -# iw ${dev} station dump - to get connected clients -# based on the above, it generates several charts - -# the data collection frequency -# if unset, will inherit the netdata update frequency -#ap_update_every= - -# the charts priority on the dashboard -#ap_priority=6900 - -# the number of retries to do in case of failure -# before disabling the module -#ap_retries=10 diff --git a/src/collectors/charts.d.plugin/ap/integrations/access_points.md b/src/collectors/charts.d.plugin/ap/integrations/access_points.md deleted file mode 100644 index 7eea0f95a..000000000 --- a/src/collectors/charts.d.plugin/ap/integrations/access_points.md +++ /dev/null @@ -1,174 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/charts.d.plugin/ap/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/charts.d.plugin/ap/metadata.yaml" -sidebar_label: "Access Points" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Linux Systems/Network" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Access Points - - -<img src="https://netdata.cloud/img/network-wired.svg" width="150"/> - - -Plugin: charts.d.plugin -Module: ap - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -The ap collector visualizes data related to wireless access points. - -It uses the `iw` command line utility to detect access points. For each interface that is of `type AP`, it then runs `iw INTERFACE station dump` and collects statistics. - -This collector is only supported on the following platforms: - -- Linux - -This collector only supports collecting metrics from a single instance of this integration. - - -### Default Behavior - -#### Auto-Detection - -The plugin is able to auto-detect if you are running access points on your linux box. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per wireless device - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| ap.clients | clients | clients | -| ap.net | received, sent | kilobits/s | -| ap.packets | received, sent | packets/s | -| ap.issues | retries, failures | issues/s | -| ap.signal | average signal | dBm | -| ap.bitrate | receive, transmit, expected | Mbps | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Install charts.d plugin - -If [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - -#### `iw` utility. - -Make sure the `iw` utility is installed. - - -### Configuration - -#### File - -The configuration file name for this integration is `charts.d/ap.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config charts.d/ap.conf -``` -#### Options - -The config file is sourced by the charts.d plugin. It's a standard bash file. - -The following collapsed table contains all the options that can be configured for the ap collector. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| ap_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no | -| ap_priority | Controls the order of charts at the netdata dashboard. | 6900 | no | -| ap_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no | - -</details> - -#### Examples - -##### Change the collection frequency - -Specify a custom collection frequence (update_every) for this collector - -```yaml -# the data collection frequency -# if unset, will inherit the netdata update frequency -ap_update_every=10 - -# the charts priority on the dashboard -#ap_priority=6900 - -# the number of retries to do in case of failure -# before disabling the module -#ap_retries=10 - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `ap` collector, run the `charts.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `charts.d.plugin` to debug the collector: - - ```bash - ./charts.d.plugin debug 1 ap - ``` - - diff --git a/src/collectors/charts.d.plugin/ap/metadata.yaml b/src/collectors/charts.d.plugin/ap/metadata.yaml deleted file mode 100644 index 6556b42ec..000000000 --- a/src/collectors/charts.d.plugin/ap/metadata.yaml +++ /dev/null @@ -1,146 +0,0 @@ -plugin_name: charts.d.plugin -modules: - - meta: - plugin_name: charts.d.plugin - module_name: ap - monitored_instance: - name: Access Points - link: "" - categories: - - data-collection.linux-systems.network-metrics - icon_filename: "network-wired.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - ap - - access - - point - - wireless - - network - most_popular: false - overview: - data_collection: - metrics_description: "The ap collector visualizes data related to wireless access points." - method_description: "It uses the `iw` command line utility to detect access points. For each interface that is of `type AP`, it then runs `iw INTERFACE station dump` and collects statistics." - supported_platforms: - include: [Linux] - exclude: [] - multi_instance: false - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "The plugin is able to auto-detect if you are running access points on your linux box." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "Install charts.d plugin" - description: | - If [using our official native DEB/RPM packages](/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed. - - title: "`iw` utility." - description: "Make sure the `iw` utility is installed." - configuration: - file: - name: charts.d/ap.conf - options: - description: | - The config file is sourced by the charts.d plugin. It's a standard bash file. - - The following collapsed table contains all the options that can be configured for the ap collector. - folding: - title: "Config options" - enabled: true - list: - - name: ap_update_every - description: The data collection frequency. If unset, will inherit the netdata update frequency. - default_value: 1 - required: false - - name: ap_priority - description: Controls the order of charts at the netdata dashboard. - default_value: 6900 - required: false - - name: ap_retries - description: The number of retries to do in case of failure before disabling the collector. - default_value: 10 - required: false - examples: - folding: - enabled: false - title: "Config" - list: - - name: Change the collection frequency - description: Specify a custom collection frequence (update_every) for this collector - config: | - # the data collection frequency - # if unset, will inherit the netdata update frequency - ap_update_every=10 - - # the charts priority on the dashboard - #ap_priority=6900 - - # the number of retries to do in case of failure - # before disabling the module - #ap_retries=10 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: wireless device - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: ap.clients - description: Connected clients to ${ssid} on ${dev} - unit: "clients" - chart_type: line - dimensions: - - name: clients - - name: ap.net - description: Bandwidth for ${ssid} on ${dev} - unit: "kilobits/s" - chart_type: area - dimensions: - - name: received - - name: sent - - name: ap.packets - description: Packets for ${ssid} on ${dev} - unit: "packets/s" - chart_type: line - dimensions: - - name: received - - name: sent - - name: ap.issues - description: Transmit Issues for ${ssid} on ${dev} - unit: "issues/s" - chart_type: line - dimensions: - - name: retries - - name: failures - - name: ap.signal - description: Average Signal for ${ssid} on ${dev} - unit: "dBm" - chart_type: line - dimensions: - - name: average signal - - name: ap.bitrate - description: Bitrate for ${ssid} on ${dev} - unit: "Mbps" - chart_type: line - dimensions: - - name: receive - - name: transmit - - name: expected diff --git a/src/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md b/src/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md index 5e34aa7d1..fdf1ccc9e 100644 --- a/src/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md +++ b/src/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md @@ -178,6 +178,7 @@ apcupsd_update_every=5 ### Debug Mode + To troubleshoot issues with the `apcupsd` collector, run the `charts.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -200,4 +201,37 @@ should give you clues as to why the collector isn't working. ./charts.d.plugin debug 1 apcupsd ``` +### Getting Logs + +If you're encountering problems with the `apcupsd` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep apcupsd +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep apcupsd /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep apcupsd +``` + diff --git a/src/collectors/charts.d.plugin/charts.d.conf b/src/collectors/charts.d.plugin/charts.d.conf index 4614f259e..b186b19e9 100644 --- a/src/collectors/charts.d.plugin/charts.d.conf +++ b/src/collectors/charts.d.plugin/charts.d.conf @@ -33,7 +33,6 @@ # enable_all_charts="yes" # BY DEFAULT ENABLED MODULES -# ap=yes # apcupsd=yes # libreswan=yes # opensips=yes diff --git a/src/collectors/charts.d.plugin/charts.d.plugin.in b/src/collectors/charts.d.plugin/charts.d.plugin.in index 4e64b7e23..e8018aaff 100755 --- a/src/collectors/charts.d.plugin/charts.d.plugin.in +++ b/src/collectors/charts.d.plugin/charts.d.plugin.in @@ -474,6 +474,7 @@ declare -A charts_enable_keyword=( ) declare -A obsolete_charts=( + ['ap']="go.d/ap" ['apache']="python.d.plugin module" ['cpu_apps']="apps.plugin" ['cpufreq']="proc plugin" diff --git a/src/collectors/charts.d.plugin/libreswan/integrations/libreswan.md b/src/collectors/charts.d.plugin/libreswan/integrations/libreswan.md index 01152ef91..fa8eb7a97 100644 --- a/src/collectors/charts.d.plugin/libreswan/integrations/libreswan.md +++ b/src/collectors/charts.d.plugin/libreswan/integrations/libreswan.md @@ -169,6 +169,7 @@ libreswan_sudo=0 ### Debug Mode + To troubleshoot issues with the `libreswan` collector, run the `charts.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -191,4 +192,37 @@ should give you clues as to why the collector isn't working. ./charts.d.plugin debug 1 libreswan ``` +### Getting Logs + +If you're encountering problems with the `libreswan` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep libreswan +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep libreswan /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep libreswan +``` + diff --git a/src/collectors/charts.d.plugin/opensips/integrations/opensips.md b/src/collectors/charts.d.plugin/opensips/integrations/opensips.md index 9ee332ba1..7fa610eb4 100644 --- a/src/collectors/charts.d.plugin/opensips/integrations/opensips.md +++ b/src/collectors/charts.d.plugin/opensips/integrations/opensips.md @@ -167,6 +167,7 @@ opensips_cmd=/opt/opensips/bin/opensipsctl ### Debug Mode + To troubleshoot issues with the `opensips` collector, run the `charts.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -189,4 +190,37 @@ should give you clues as to why the collector isn't working. ./charts.d.plugin debug 1 opensips ``` +### Getting Logs + +If you're encountering problems with the `opensips` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep opensips +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep opensips /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep opensips +``` + diff --git a/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md b/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md index 14fcc2f97..f9221caa1 100644 --- a/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md +++ b/src/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md @@ -22,7 +22,7 @@ Module: sensors ## Overview Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). -For all other cases use the [Go collector](/src/go/collectors/go.d.plugin/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." +For all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." It will provide charts for all configured system sensors, by reading sensors directly from the kernel. @@ -176,6 +176,7 @@ sensors_sys_depth=5 ### Debug Mode + To troubleshoot issues with the `sensors` collector, run the `charts.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -198,4 +199,37 @@ should give you clues as to why the collector isn't working. ./charts.d.plugin debug 1 sensors ``` +### Getting Logs + +If you're encountering problems with the `sensors` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep sensors +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep sensors /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep sensors +``` + diff --git a/src/collectors/charts.d.plugin/sensors/metadata.yaml b/src/collectors/charts.d.plugin/sensors/metadata.yaml index ffa9f43bb..9aacdd353 100644 --- a/src/collectors/charts.d.plugin/sensors/metadata.yaml +++ b/src/collectors/charts.d.plugin/sensors/metadata.yaml @@ -25,7 +25,7 @@ modules: data_collection: metrics_description: | Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures). - For all other cases use the [Go collector](/src/go/collectors/go.d.plugin/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." + For all other cases use the [Go collector](/src/go/plugin/go.d/modules/sensors/README.md), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values." method_description: | It will provide charts for all configured system sensors, by reading sensors directly from the kernel. The values graphed are the raw hardware values of the sensors. diff --git a/src/collectors/common-contexts/common-contexts.h b/src/collectors/common-contexts/common-contexts.h index 9d2d77147..1938230dc 100644 --- a/src/collectors/common-contexts/common-contexts.h +++ b/src/collectors/common-contexts/common-contexts.h @@ -20,7 +20,9 @@ typedef void (*instance_labels_cb_t)(RRDSET *st, void *data); #include "system.io.h"
#include "system.ram.h"
+#include "system.interrupts.h"
#include "system.processes.h"
+#include "system.ipc.h"
#include "mem.swap.h"
#include "mem.pgfaults.h"
#include "mem.available.h"
diff --git a/src/collectors/common-contexts/system.interrupts.h b/src/collectors/common-contexts/system.interrupts.h new file mode 100644 index 000000000..dffd70572 --- /dev/null +++ b/src/collectors/common-contexts/system.interrupts.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SYSTEM_INTERRUPTS_H
+#define NETDATA_SYSTEM_INTERRUPTS_H
+
+#include "common-contexts.h"
+
+#define _
+
+static inline void common_interrupts(uint64_t interrupts, int update_every, char *ext_module) {
+ static RRDSET *st_intr = NULL;
+ static RRDDIM *rd_interrupts = NULL;
+
+ char *module = (!ext_module) ? _COMMON_PLUGIN_MODULE_NAME: ext_module;
+
+ if(unlikely(!st_intr)) {
+ st_intr = rrdset_create_localhost( "system"
+ , "intr"
+ , NULL
+ , "interrupts"
+ , NULL
+ , "CPU Interrupts"
+ , "interrupts/s"
+ , _COMMON_PLUGIN_NAME
+ , module
+ , NETDATA_CHART_PRIO_SYSTEM_INTR
+ , update_every
+ , RRDSET_TYPE_LINE);
+
+ rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL);
+
+ rd_interrupts = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_intr, rd_interrupts, (collected_number)interrupts);
+ rrdset_done(st_intr);
+}
+
+#endif //NETDATA_SYSTEM_INTERRUPTS_H
diff --git a/src/collectors/common-contexts/system.ipc.h b/src/collectors/common-contexts/system.ipc.h new file mode 100644 index 000000000..129ce6dfa --- /dev/null +++ b/src/collectors/common-contexts/system.ipc.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_SYSTEM_IPC_H
+#define NETDATA_SYSTEM_IPC_H
+
+#include "common-contexts.h"
+
+static inline void common_semaphore_ipc(uint64_t semaphore, NETDATA_DOUBLE red, char *module, int update_every) {
+ static RRDSET *st_semaphores = NULL;
+ static RRDDIM *rd_semaphores = NULL;
+ if(unlikely(!st_semaphores)) {
+ st_semaphores = rrdset_create_localhost("system"
+ , "ipc_semaphores"
+ , NULL
+ , "ipc semaphores"
+ , NULL
+ , "IPC Semaphores"
+ , "semaphores"
+ , _COMMON_PLUGIN_NAME
+ , module
+ , NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES
+ , update_every
+ , RRDSET_TYPE_AREA
+ );
+ rd_semaphores = rrddim_add(st_semaphores, "semaphores", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_semaphores, rd_semaphores, semaphore);
+ rrdset_done(st_semaphores);
+ if (!strcmp(module, "ipc"))
+ st_semaphores->red = red;
+}
+
+#endif //NETDATA_SYSTEM_IPC_H
diff --git a/src/collectors/cups.plugin/cups_plugin.c b/src/collectors/cups.plugin/cups_plugin.c index 4e452f096..20b155e14 100644 --- a/src/collectors/cups.plugin/cups_plugin.c +++ b/src/collectors/cups.plugin/cups_plugin.c @@ -231,7 +231,7 @@ int main(int argc, char **argv) { parse_command_line(argc, argv); - errno = 0; + errno_clear(); dict_dest_job_metrics = dictionary_create(DICT_OPTION_SINGLE_THREADED); diff --git a/src/collectors/diskspace.plugin/plugin_diskspace.c b/src/collectors/diskspace.plugin/plugin_diskspace.c index 10e07586c..f1d8909b2 100644 --- a/src/collectors/diskspace.plugin/plugin_diskspace.c +++ b/src/collectors/diskspace.plugin/plugin_diskspace.c @@ -4,8 +4,8 @@ #define PLUGIN_DISKSPACE_NAME "diskspace.plugin" -#define DEFAULT_EXCLUDED_PATHS "/proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*" -#define DEFAULT_EXCLUDED_FILESYSTEMS "*gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs" +#define DEFAULT_EXCLUDED_PATHS "/dev /dev/shm /proc/* /sys/* /var/run/user/* /run/lock /run/user/* /snap/* /var/lib/docker/* /var/lib/containers/storage/* /run/credentials/* /run/containerd/* /rpool /rpool/*" +#define DEFAULT_EXCLUDED_FILESYSTEMS "*gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs cgroup cgroup2 hugetlbfs devtmpfs fuse.lxcfs" #define DEFAULT_EXCLUDED_FILESYSTEMS_INODES "msdosfs msdos vfat overlayfs aufs* *unionfs" #define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace" diff --git a/src/collectors/ebpf.plugin/ebpf.c b/src/collectors/ebpf.plugin/ebpf.c index de2b6e144..5424ea8f0 100644 --- a/src/collectors/ebpf.plugin/ebpf.c +++ b/src/collectors/ebpf.plugin/ebpf.c @@ -30,6 +30,7 @@ int ebpf_nprocs; int isrh = 0; int main_thread_id = 0; int process_pid_fd = -1; +uint64_t collect_pids = 0; static size_t global_iterations_counter = 1; bool publish_internal_metrics = true; @@ -996,7 +997,7 @@ static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_ta */ static void ebpf_create_apps_charts(struct ebpf_target *root) { - if (unlikely(!ebpf_all_pids)) + if (unlikely(!ebpf_pids)) return; struct ebpf_target *w; @@ -1028,21 +1029,15 @@ static void ebpf_create_apps_charts(struct ebpf_target *root) } } - int i; - if (!newly_added) { + if (newly_added) { + int i; for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { - ebpf_module_t *current = &ebpf_modules[i]; - if (current->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) + if (!(collect_pids & (1<<i))) continue; + ebpf_module_t *current = &ebpf_modules[i]; ebpf_create_apps_for_module(current, root); } - return; - } - - for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { - ebpf_module_t *current = &ebpf_modules[i]; - ebpf_create_apps_for_module(current, root); } } @@ -2680,7 +2675,7 @@ static void ebpf_allocate_common_vectors() { ebpf_judy_pid.pid_table = ebpf_allocate_pid_aral(NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME, sizeof(netdata_ebpf_judy_pid_stats_t)); - ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *)); + ebpf_pids = callocz((size_t)pid_max, sizeof(ebpf_pid_data_t)); ebpf_aral_init(); } @@ -3014,7 +3009,7 @@ static int ebpf_load_collector_config(char *path, int *disable_cgroups, int upda /** * Set global variables reading environment variables */ -void set_global_variables() +static void ebpf_set_global_variables() { // Get environment variables ebpf_plugin_dir = getenv("NETDATA_PLUGINS_DIR"); @@ -3042,6 +3037,7 @@ void set_global_variables() isrh = get_redhat_release(); pid_max = os_get_system_pid_max(); running_on_kernel = ebpf_get_kernel_version(); + memset(pids_fd, -1, sizeof(pids_fd)); } /** @@ -3418,6 +3414,11 @@ void ebpf_send_statistic_data() } ebpf_write_end_chart(); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, "monitoring_pid", ""); + write_chart_dimension("user", ebpf_all_pids_count); + write_chart_dimension("kernel", ebpf_hash_table_pids_count); + ebpf_write_end_chart(); + ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME, ""); for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) { ebpf_module_t *wem = &ebpf_modules[i]; @@ -3490,6 +3491,37 @@ static void update_internal_metric_variable() } /** + * Create PIDS Chart + * + * Write to standard output current values for PIDSs charts. + * + * @param order order to display chart + * @param update_every time used to update charts + */ +static void ebpf_create_pids_chart(int order, int update_every) +{ + ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY, + "monitoring_pid", + "", + "Total number of monitored PIDs", + "pids", + NETDATA_EBPF_FAMILY, + NETDATA_EBPF_CHART_TYPE_LINE, + "netdata.ebpf_pids", + order, + update_every, + "main"); + + ebpf_write_global_dimension("user", + "user", + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); + + ebpf_write_global_dimension("kernel", + "kernel", + ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); +} + +/** * Create Thread Chart * * Write to standard output current values for threads charts. @@ -3538,7 +3570,7 @@ static void ebpf_create_thread_chart(char *name, (char *)em->info.thread_name, ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]); } -} + } /** * Create chart for Load Thread @@ -3741,6 +3773,8 @@ static void ebpf_create_statistic_charts(int update_every) update_every, NULL); + ebpf_create_pids_chart(NETDATA_EBPF_ORDER_PIDS, update_every); + ebpf_create_thread_chart(NETDATA_EBPF_LIFE_TIME, "Time remaining for thread.", "seconds", @@ -3974,18 +4008,18 @@ int main(int argc, char **argv) clocks_init(); nd_log_initialize_for_external_plugins(NETDATA_EBPF_PLUGIN_NAME); - main_thread_id = gettid_cached(); - - set_global_variables(); - ebpf_parse_args(argc, argv); - ebpf_manage_pid(getpid()); - + ebpf_set_global_variables(); if (ebpf_can_plugin_load_code(running_on_kernel, NETDATA_EBPF_PLUGIN_NAME)) return 2; if (ebpf_adjust_memory_limit()) return 3; + main_thread_id = gettid_cached(); + + ebpf_parse_args(argc, argv); + ebpf_manage_pid(getpid()); + signal(SIGINT, ebpf_stop_threads); signal(SIGQUIT, ebpf_stop_threads); signal(SIGTERM, ebpf_stop_threads); @@ -4018,7 +4052,7 @@ int main(int argc, char **argv) ebpf_cgroup_integration, NULL); - int i; + uint32_t i; for (i = 0; ebpf_threads[i].name != NULL; i++) { struct netdata_static_thread *st = &ebpf_threads[i]; @@ -4028,6 +4062,10 @@ int main(int argc, char **argv) if (em->enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) { em->enabled = NETDATA_THREAD_EBPF_RUNNING; em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME; + + if (em->functions.apps_routine && (em->apps_charts || em->cgroup_charts)) { + collect_pids |= 1<<i; + } st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_JOINABLE, st->start_routine, em); } else { em->lifetime = EBPF_DEFAULT_LIFETIME; @@ -4038,7 +4076,7 @@ int main(int argc, char **argv) heartbeat_t hb; heartbeat_init(&hb); int update_apps_every = (int) EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT; - int max_period = update_apps_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; int update_apps_list = update_apps_every - 1; int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core; //Plugin will be killed when it receives a signal @@ -4050,19 +4088,23 @@ int main(int argc, char **argv) ebpf_create_statistic_charts(EBPF_DEFAULT_UPDATE_EVERY); ebpf_send_statistic_data(); - pthread_mutex_unlock(&lock); fflush(stdout); + pthread_mutex_unlock(&lock); } if (++update_apps_list == update_apps_every) { update_apps_list = 0; pthread_mutex_lock(&lock); - pthread_mutex_lock(&collect_data_mutex); - ebpf_cleanup_exited_pids(max_period); - collect_data_for_all_processes(process_pid_fd, process_maps_per_core); - - ebpf_create_apps_charts(apps_groups_root_target); - pthread_mutex_unlock(&collect_data_mutex); + if (collect_pids) { + pthread_mutex_lock(&collect_data_mutex); + ebpf_parse_proc_files(); + if (collect_pids & (1<<EBPF_MODULE_PROCESS_IDX)) { + collect_data_for_all_processes(process_pid_fd, process_maps_per_core, max_period); + } + + ebpf_create_apps_charts(apps_groups_root_target); + pthread_mutex_unlock(&collect_data_mutex); + } pthread_mutex_unlock(&lock); } } diff --git a/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf b/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf index c378e82e8..9c51b2c52 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/cachestat.conf @@ -37,6 +37,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf b/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf index 2d54bce97..614d814e6 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/dcstat.conf @@ -35,6 +35,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/fd.conf b/src/collectors/ebpf.plugin/ebpf.d/fd.conf index d48230323..4d0d2ac05 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/fd.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/fd.conf @@ -23,5 +23,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf b/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf index ea97ebe85..a137b945b 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/oomkill.conf @@ -3,9 +3,21 @@ # `return : In the `return` mode, the eBPF collector monitors the same kernel functions as `entry`, but also creates # new charts for the return of these functions, such as errors. # +# The eBPF collector also creates charts for each running application through an integration with the `apps.plugin` +# or `cgroups.plugin`. +# If you want to disable the integration with `apps.plugin` or `cgroups.plugin` along with the above charts, change +# the setting `apps` and `cgroups` to 'no'. +# +# The `pid table size` defines the maximum number of PIDs stored inside the hash table. +# +# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6. +# # The `lifetime` defines the time length a thread will run when it is enabled by a function. # [global] # ebpf load mode = entry # update every = 1 + ebpf type format = auto + ebpf co-re tracing = trampoline + collect pid = real parent lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/process.conf b/src/collectors/ebpf.plugin/ebpf.d/process.conf index 6f6477003..150c57920 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/process.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/process.conf @@ -26,6 +26,6 @@ # cgroups = no # update every = 10 # pid table size = 32768 - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/shm.conf b/src/collectors/ebpf.plugin/ebpf.d/shm.conf index 0314bdc95..4769c52ee 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/shm.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/shm.conf @@ -31,7 +31,7 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/swap.conf b/src/collectors/ebpf.plugin/ebpf.d/swap.conf index 6d76b9880..7d4c5f7d3 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/swap.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/swap.conf @@ -30,6 +30,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline - collect pid = all + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.d/vfs.conf b/src/collectors/ebpf.plugin/ebpf.d/vfs.conf index f511581b8..941ac1407 100644 --- a/src/collectors/ebpf.plugin/ebpf.d/vfs.conf +++ b/src/collectors/ebpf.plugin/ebpf.d/vfs.conf @@ -31,5 +31,6 @@ # pid table size = 32768 ebpf type format = auto ebpf co-re tracing = trampoline + collect pid = real parent # maps per core = yes lifetime = 300 diff --git a/src/collectors/ebpf.plugin/ebpf.h b/src/collectors/ebpf.plugin/ebpf.h index c54b5900d..6fc42b3e4 100644 --- a/src/collectors/ebpf.plugin/ebpf.h +++ b/src/collectors/ebpf.plugin/ebpf.h @@ -37,6 +37,7 @@ #define NETDATA_EBPF_OLD_CONFIG_FILE "ebpf.conf" #define NETDATA_EBPF_CONFIG_FILE "ebpf.d.conf" +extern size_t ebpf_hash_table_pids_count; #ifdef LIBBPF_MAJOR_VERSION // BTF code #include "cachestat.skel.h" #include "dc.skel.h" @@ -122,34 +123,6 @@ typedef struct netdata_ebpf_judy_pid_stats { } netdata_ebpf_judy_pid_stats_t; extern ebpf_module_t ebpf_modules[]; -enum ebpf_main_index { - EBPF_MODULE_PROCESS_IDX, - EBPF_MODULE_SOCKET_IDX, - EBPF_MODULE_CACHESTAT_IDX, - EBPF_MODULE_SYNC_IDX, - EBPF_MODULE_DCSTAT_IDX, - EBPF_MODULE_SWAP_IDX, - EBPF_MODULE_VFS_IDX, - EBPF_MODULE_FILESYSTEM_IDX, - EBPF_MODULE_DISK_IDX, - EBPF_MODULE_MOUNT_IDX, - EBPF_MODULE_FD_IDX, - EBPF_MODULE_HARDIRQ_IDX, - EBPF_MODULE_SOFTIRQ_IDX, - EBPF_MODULE_OOMKILL_IDX, - EBPF_MODULE_SHM_IDX, - EBPF_MODULE_MDFLUSH_IDX, - EBPF_MODULE_FUNCTION_IDX, - /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ - EBPF_OPTION_ALL_CHARTS, - EBPF_OPTION_VERSION, - EBPF_OPTION_HELP, - EBPF_OPTION_GLOBAL_CHART, - EBPF_OPTION_RETURN_MODE, - EBPF_OPTION_LEGACY, - EBPF_OPTION_CORE, - EBPF_OPTION_UNITTEST -}; typedef struct ebpf_tracepoint { bool enabled; @@ -380,6 +353,7 @@ void ebpf_read_local_addresses_unsafe(); extern ebpf_filesystem_partitions_t localfs[]; extern ebpf_sync_syscalls_t local_syscalls[]; extern bool ebpf_plugin_exit; +extern uint64_t collect_pids; static inline bool ebpf_plugin_stop(void) { return ebpf_plugin_exit || nd_thread_signaled_to_cancel(); diff --git a/src/collectors/ebpf.plugin/ebpf_apps.c b/src/collectors/ebpf.plugin/ebpf_apps.c index a17cdb33d..d90c5f128 100644 --- a/src/collectors/ebpf.plugin/ebpf_apps.c +++ b/src/collectors/ebpf.plugin/ebpf_apps.c @@ -21,37 +21,11 @@ void ebpf_aral_init(void) max_elements = NETDATA_EBPF_ALLOC_MIN_ELEMENTS; } - ebpf_aral_apps_pid_stat = ebpf_allocate_pid_aral("ebpf_pid_stat", sizeof(struct ebpf_pid_stat)); - #ifdef NETDATA_DEV_MODE netdata_log_info("Plugin is using ARAL with values %d", NETDATA_EBPF_ALLOC_MAX_PID); #endif } -/** - * eBPF pid stat get - * - * Get a ebpf_pid_stat entry to be used with a specific PID. - * - * @return it returns the address on success. - */ -struct ebpf_pid_stat *ebpf_pid_stat_get(void) -{ - struct ebpf_pid_stat *target = aral_mallocz(ebpf_aral_apps_pid_stat); - memset(target, 0, sizeof(struct ebpf_pid_stat)); - return target; -} - -/** - * eBPF target release - * - * @param stat Release a target after usage. - */ -void ebpf_pid_stat_release(struct ebpf_pid_stat *stat) -{ - aral_freez(ebpf_aral_apps_pid_stat, stat); -} - // ---------------------------------------------------------------------------- // internal flags // handled in code (automatically set) @@ -332,11 +306,11 @@ int ebpf_read_apps_groups_conf(struct ebpf_target **agdt, struct ebpf_target **a #define MAX_CMDLINE 16384 -struct ebpf_pid_stat **ebpf_all_pids = NULL; // to avoid allocations, we pre-allocate the - // the entire pid space. -struct ebpf_pid_stat *ebpf_root_of_pids = NULL; // global list of all processes running +ebpf_pid_data_t *ebpf_pids = NULL; // to avoid allocations, we pre-allocate the entire pid space. +ebpf_pid_data_t *ebpf_pids_link_list = NULL; // global list of all processes running -size_t ebpf_all_pids_count = 0; // the number of processes running +size_t ebpf_all_pids_count = 0; // the number of processes running read from /proc +size_t ebpf_hash_table_pids_count = 0; // the number of tasks in our hash tables struct ebpf_target *apps_groups_default_target = NULL, // the default target @@ -346,6 +320,8 @@ struct ebpf_target size_t apps_groups_targets_count = 0; // # of apps_groups.conf targets +int pids_fd[EBPF_PIDS_END_IDX]; + // ---------------------------------------------------------------------------- // internal counters @@ -389,109 +365,11 @@ static inline void debug_log_dummy(void) #endif /** - * Managed log - * - * Store log information if it is necessary. - * - * @param p the pid stat structure - * @param log the log id - * @param status the return from a function. - * - * @return It returns the status value. - */ -static inline int managed_log(struct ebpf_pid_stat *p, uint32_t log, int status) -{ - if (unlikely(!status)) { - // netdata_log_error("command failed log %u, errno %d", log, errno); - - if (unlikely(debug_enabled || errno != ENOENT)) { - if (unlikely(debug_enabled || !(p->log_thrown & log))) { - p->log_thrown |= log; - switch (log) { - case PID_LOG_IO: - netdata_log_error( - "Cannot process %s/proc/%d/io (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_STATUS: - netdata_log_error( - "Cannot process %s/proc/%d/status (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_CMDLINE: - netdata_log_error( - "Cannot process %s/proc/%d/cmdline (command '%s')", netdata_configured_host_prefix, p->pid, - p->comm); - break; - - case PID_LOG_FDS: - netdata_log_error( - "Cannot process entries in %s/proc/%d/fd (command '%s')", netdata_configured_host_prefix, - p->pid, p->comm); - break; - - case PID_LOG_STAT: - break; - - default: - netdata_log_error("unhandled error for pid %d, command '%s'", p->pid, p->comm); - break; - } - } - } - errno = 0; - } else if (unlikely(p->log_thrown & log)) { - // netdata_log_error("unsetting log %u on pid %d", log, p->pid); - p->log_thrown &= ~log; - } - - return status; -} - -/** - * Get PID entry - * - * Get or allocate the PID entry for the specified pid. - * - * @param pid the pid to search the data. - * @param tgid the task group id - * - * @return It returns the pid entry structure - */ -ebpf_pid_stat_t *ebpf_get_pid_entry(pid_t pid, pid_t tgid) -{ - ebpf_pid_stat_t *ptr = ebpf_all_pids[pid]; - if (unlikely(ptr)) { - if (!ptr->ppid && tgid) - ptr->ppid = tgid; - return ebpf_all_pids[pid]; - } - - struct ebpf_pid_stat *p = ebpf_pid_stat_get(); - - if (likely(ebpf_root_of_pids)) - ebpf_root_of_pids->prev = p; - - p->next = ebpf_root_of_pids; - ebpf_root_of_pids = p; - - p->pid = pid; - p->ppid = tgid; - - ebpf_all_pids[pid] = p; - ebpf_all_pids_count++; - - return p; -} - -/** * Assign the PID to a target. * * @param p the pid_stat structure to assign for a target. */ -static inline void assign_target_to_pid(struct ebpf_pid_stat *p) +static inline void assign_target_to_pid(ebpf_pid_data_t *p) { targets_assignment_counter++; @@ -499,6 +377,7 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) size_t pclen = strlen(p->comm); struct ebpf_target *w; + bool assigned = false; for (w = apps_groups_root_target; w; w = w->next) { // if(debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("\t\tcomparing '%s' with '%s'", w->compare, p->comm); @@ -521,9 +400,17 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) if (debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int("%s linked to target %s", p->comm, p->target->name); + w->processes++; + assigned = true; + break; } } + + if (!assigned) { + apps_groups_default_target->processes++; + p->target = apps_groups_default_target; + } } // ---------------------------------------------------------------------------- @@ -532,22 +419,18 @@ static inline void assign_target_to_pid(struct ebpf_pid_stat *p) /** * Read cmd line from /proc/PID/cmdline * - * @param p the ebpf_pid_stat_structure. + * @param p the ebpf_pid_data structure. * * @return It returns 1 on success and 0 otherwise. */ -static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p) +static inline int read_proc_pid_cmdline(ebpf_pid_data_t *p, char *cmdline) { - static char cmdline[MAX_CMDLINE + 1]; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); int ret = 0; - if (unlikely(!p->cmdline_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid); - p->cmdline_filename = strdupz(filename); - } - int fd = open(p->cmdline_filename, procfile_open_flags, 0666); + int fd = open(filename, procfile_open_flags, 0666); if (unlikely(fd == -1)) goto cleanup; @@ -563,21 +446,12 @@ static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p) cmdline[i] = ' '; } - debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline); + debug_log("Read file '%s' contents: %s", filename, p->cmdline); ret = 1; cleanup: - // copy the command to the command line - if (p->cmdline) - freez(p->cmdline); - p->cmdline = strdupz(p->comm); - - rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock); - netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid); - if (pid_ptr) - pid_ptr->cmdline = p->cmdline; - rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); + p->cmdline[0] = '\0'; return ret; } @@ -587,44 +461,43 @@ cleanup: * Assign target to pid * * @param p the pid stat structure to store the data. - * @param ptr an useless argument. */ -static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) +static inline int read_proc_pid_stat(ebpf_pid_data_t *p) { - UNUSED(ptr); + procfile *ff; - static procfile *ff = NULL; - - if (unlikely(!p->stat_filename)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/%d/stat", netdata_configured_host_prefix, p->pid); - p->stat_filename = strdupz(filename); - } - - int set_quotes = (!ff) ? 1 : 0; + char filename[FILENAME_MAX + 1]; + int ret = 0; + snprintfz(filename, FILENAME_MAX, "%s/proc/%u/stat", netdata_configured_host_prefix, p->pid); struct stat statbuf; - if (stat(p->stat_filename, &statbuf)) + if (stat(filename, &statbuf)) { + // PID ended before we stat the file + p->has_proc_file = 0; return 0; + } - ff = procfile_reopen(ff, p->stat_filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + ff = procfile_open(filename, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); if (unlikely(!ff)) - return 0; + goto cleanup_pid_stat; - if (unlikely(set_quotes)) - procfile_set_open_close(ff, "(", ")"); + procfile_set_open_close(ff, "(", ")"); ff = procfile_readall(ff); if (unlikely(!ff)) - return 0; - - p->last_stat_collected_usec = p->stat_collected_usec; - p->stat_collected_usec = now_monotonic_usec(); - calls_counter++; + goto cleanup_pid_stat; char *comm = procfile_lineword(ff, 0, 1); - p->ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + int32_t ppid = (int32_t)str2pid_t(procfile_lineword(ff, 0, 3)); + if (p->ppid == ppid && p->target) + goto without_cmdline_target; + + p->ppid = ppid; + + char cmdline[MAX_CMDLINE + 1]; + p->cmdline = cmdline; + read_proc_pid_cmdline(p, cmdline); if (strcmp(p->comm, comm) != 0) { if (unlikely(debug_enabled)) { if (p->comm[0]) @@ -634,58 +507,50 @@ static inline int read_proc_pid_stat(struct ebpf_pid_stat *p, void *ptr) } strncpyz(p->comm, comm, EBPF_MAX_COMPARE_NAME); - - // /proc/<pid>/cmdline - if (likely(proc_pid_cmdline_is_needed)) - managed_log(p, PID_LOG_CMDLINE, read_proc_pid_cmdline(p)); - - assign_target_to_pid(p); } + if (!p->target) + assign_target_to_pid(p); + + p->cmdline = NULL; if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) debug_log_int( - "READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s' (dt=%llu)", - netdata_configured_host_prefix, p->pid, p->comm, (p->target) ? p->target->name : "UNSET", - p->stat_collected_usec - p->last_stat_collected_usec); + "READ PROC/PID/STAT: %s/proc/%d/stat, process: '%s' on target '%s'", + netdata_configured_host_prefix, p->pid, p->comm, (p->target) ? p->target->name : "UNSET"); - return 1; +without_cmdline_target: + p->has_proc_file = 1; + p->not_updated = 0; + ret = 1; +cleanup_pid_stat: + procfile_close(ff); + + return ret; } /** * Collect data for PID * * @param pid the current pid that we are working - * @param ptr a NULL value * * @return It returns 1 on success and 0 otherwise */ -static inline int ebpf_collect_data_for_pid(pid_t pid, void *ptr) +static inline int ebpf_collect_data_for_pid(pid_t pid) { if (unlikely(pid < 0 || pid > pid_max)) { netdata_log_error("Invalid pid %d read (expected %d to %d). Ignoring process.", pid, 0, pid_max); return 0; } - ebpf_pid_stat_t *p = ebpf_get_pid_entry(pid, 0); - if (unlikely(!p || p->read)) - return 0; - p->read = 1; - - if (unlikely(!managed_log(p, PID_LOG_STAT, read_proc_pid_stat(p, ptr)))) - // there is no reason to proceed if we cannot get its status - return 0; + ebpf_pid_data_t *p = ebpf_get_pid_data((uint32_t)pid, 0, NULL, EBPF_PIDS_PROC_FILE); + read_proc_pid_stat(p); // check its parent pid - if (unlikely(p->ppid < 0 || p->ppid > pid_max)) { - netdata_log_error("Pid %d (command '%s') states invalid parent pid %d. Using 0.", pid, p->comm, p->ppid); + if (unlikely( p->ppid > pid_max)) { + netdata_log_error("Pid %d (command '%s') states invalid parent pid %u. Using 0.", pid, p->comm, p->ppid); p->ppid = 0; } - // mark it as updated - p->updated = 1; - p->keep = 0; - p->keeploops = 0; - return 1; } @@ -694,14 +559,13 @@ static inline int ebpf_collect_data_for_pid(pid_t pid, void *ptr) */ static inline void link_all_processes_to_their_parents(void) { - struct ebpf_pid_stat *p, *pp; + ebpf_pid_data_t *p, *pp; // link all children to their parents // and update children count on parents - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // for each process found - p->sortlist = 0; p->parent = NULL; if (unlikely(!p->ppid)) { @@ -709,16 +573,15 @@ static inline void link_all_processes_to_their_parents(void) continue; } - pp = ebpf_all_pids[p->ppid]; - if (likely(pp)) { + pp = &ebpf_pids[p->ppid]; + if (likely(pp->pid)) { p->parent = pp; pp->children_count++; if (unlikely(debug_enabled || (p->target && p->target->debug_enabled))) debug_log_int( - "child %d (%s, %s) on target '%s' has parent %d (%s, %s).", p->pid, p->comm, - p->updated ? "running" : "exited", (p->target) ? p->target->name : "UNSET", pp->pid, pp->comm, - pp->updated ? "running" : "exited"); + "child %d (%s) on target '%s' has parent %d (%s).", p->pid, p->comm, + (p->target) ? p->target->name : "UNSET", pp->pid, pp->comm); } else { p->parent = NULL; debug_log("pid %d %s states parent %d, but the later does not exist.", p->pid, p->comm, p->ppid); @@ -731,7 +594,7 @@ static inline void link_all_processes_to_their_parents(void) */ static void apply_apps_groups_targets_inheritance(void) { - struct ebpf_pid_stat *p = NULL; + struct ebpf_pid_data *p = NULL; // children that do not have a target // inherit their target from their parent @@ -740,7 +603,7 @@ static void apply_apps_groups_targets_inheritance(void) if (unlikely(debug_enabled)) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // if this process does not have a target // and it has a parent // and its parent has a target @@ -751,7 +614,7 @@ static void apply_apps_groups_targets_inheritance(void) if (debug_enabled || (p->target && p->target->debug_enabled)) debug_log_int( - "TARGET INHERITANCE: %s is inherited by %d (%s) from its parent %d (%s).", p->target->name, + "TARGET INHERITANCE: %s is inherited by %u (%s) from its parent %d (%s).", p->target->name, p->pid, p->comm, p->parent->pid, p->parent->comm); } } @@ -766,7 +629,7 @@ static void apply_apps_groups_targets_inheritance(void) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { if (unlikely(!p->sortlist && !p->children_count)) p->sortlist = sortlist++; @@ -802,17 +665,15 @@ static void apply_apps_groups_targets_inheritance(void) } // init goes always to default target - if (ebpf_all_pids[INIT_PID]) - ebpf_all_pids[INIT_PID]->target = apps_groups_default_target; + ebpf_pids[INIT_PID].target = apps_groups_default_target; // pid 0 goes always to default target - if (ebpf_all_pids[0]) - ebpf_all_pids[0]->target = apps_groups_default_target; + ebpf_pids[0].target = apps_groups_default_target; // give a default target on all top level processes if (unlikely(debug_enabled)) loops++; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { // if the process is not merged itself // then is is a top level process if (unlikely(!p->merged && !p->target)) @@ -823,8 +684,7 @@ static void apply_apps_groups_targets_inheritance(void) p->sortlist = sortlist++; } - if (ebpf_all_pids[1]) - ebpf_all_pids[1]->sortlist = sortlist++; + ebpf_pids[1].sortlist = sortlist++; // give a target to all merged child processes found = 1; @@ -832,7 +692,7 @@ static void apply_apps_groups_targets_inheritance(void) if (unlikely(debug_enabled)) loops++; found = 0; - for (p = ebpf_root_of_pids; p; p = p->next) { + for (p = ebpf_pids_link_list; p; p = p->next) { if (unlikely(!p->target && p->merged && p->parent && p->parent->target)) { p->target = p->parent->target; found++; @@ -872,29 +732,23 @@ static inline void post_aggregate_targets(struct ebpf_target *root) * * @param pid the PID that will be removed. */ -static inline void ebpf_del_pid_entry(pid_t pid) +void ebpf_del_pid_entry(pid_t pid) { - struct ebpf_pid_stat *p = ebpf_all_pids[pid]; - - if (unlikely(!p)) { - netdata_log_error("attempted to free pid %d that is not allocated.", pid); - return; - } + ebpf_pid_data_t *p = &ebpf_pids[pid]; debug_log("process %d %s exited, deleting it.", pid, p->comm); - if (ebpf_root_of_pids == p) - ebpf_root_of_pids = p->next; + if (ebpf_pids_link_list == p) + ebpf_pids_link_list = p->next; if (p->next) p->next->prev = p->prev; if (p->prev) p->prev->next = p->next; - freez(p->stat_filename); - freez(p->status_filename); - freez(p->io_filename); - freez(p->cmdline_filename); + + if ((p->thread_collecting & EBPF_PIDS_PROC_FILE) || p->has_proc_file) + ebpf_all_pids_count--; rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock); netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid); @@ -914,58 +768,19 @@ static inline void ebpf_del_pid_entry(pid_t pid) } rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); - freez(p->cmdline); - ebpf_pid_stat_release(p); - - ebpf_all_pids[pid] = NULL; - ebpf_all_pids_count--; -} - -/** - * Get command string associated with a PID. - * This can only safely be used when holding the `collect_data_mutex` lock. - * - * @param pid the pid to search the data. - * @param n the maximum amount of bytes to copy into dest. - * if this is greater than the size of the command, it is clipped. - * @param dest the target memory buffer to write the command into. - * @return -1 if the PID hasn't been scraped yet, 0 otherwise. - */ -int get_pid_comm(pid_t pid, size_t n, char *dest) -{ - struct ebpf_pid_stat *stat; - - stat = ebpf_all_pids[pid]; - if (unlikely(stat == NULL)) { - return -1; - } - - if (unlikely(n > sizeof(stat->comm))) { - n = sizeof(stat->comm); - } - - strncpyz(dest, stat->comm, n); - return 0; + memset(p, 0, sizeof(ebpf_pid_data_t)); } /** * Remove PIDs when they are not running more. */ -void ebpf_cleanup_exited_pids(int max) +static void ebpf_cleanup_exited_pids() { - struct ebpf_pid_stat *p = NULL; - - for (p = ebpf_root_of_pids; p;) { - if (p->not_updated > max) { - if (unlikely(debug_enabled && (p->keep || p->keeploops))) - debug_log(" > CLEANUP cannot keep exited process %d (%s) anymore - removing it.", p->pid, p->comm); - - pid_t r = p->pid; - p = p->next; - - ebpf_del_pid_entry(r); + ebpf_pid_data_t *p = NULL; + for (p = ebpf_pids_link_list; p; p = p->next) { + if (!p->has_proc_file) { + ebpf_reset_specific_pid_data(p); } - p = p->next; } } @@ -974,14 +789,14 @@ void ebpf_cleanup_exited_pids(int max) * * @return It returns 0 on success and -1 otherwise. */ -static inline void read_proc_filesystem() +static int ebpf_read_proc_filesystem() { char dirname[FILENAME_MAX + 1]; snprintfz(dirname, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); DIR *dir = opendir(dirname); if (!dir) - return; + return -1; struct dirent *de = NULL; @@ -997,9 +812,11 @@ static inline void read_proc_filesystem() if (unlikely(endptr == de->d_name || *endptr != '\0')) continue; - ebpf_collect_data_for_pid(pid, NULL); + ebpf_collect_data_for_pid(pid); } closedir(dir); + + return 0; } /** @@ -1009,17 +826,17 @@ static inline void read_proc_filesystem() * @param p the pid with information to update * @param o never used */ -static inline void aggregate_pid_on_target(struct ebpf_target *w, struct ebpf_pid_stat *p, struct ebpf_target *o) +static inline void aggregate_pid_on_target(struct ebpf_target *w, ebpf_pid_data_t *p, struct ebpf_target *o) { UNUSED(o); - if (unlikely(!p->updated)) { + if (unlikely(!p->has_proc_file)) { // the process is not running return; } if (unlikely(!w)) { - netdata_log_error("pid %d %s was left without a target!", p->pid, p->comm); + netdata_log_error("pid %u %s was left without a target!", p->pid, p->comm); return; } @@ -1042,6 +859,7 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; ebpf_process_stat_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { ebpf_process_stat_t *w = &out[i]; total->exit_call += w->exit_call; @@ -1049,7 +867,11 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) total->create_thread += w->create_thread; total->create_process += w->create_process; total->release_call += w->release_call; + + if (w->ct > ct) + ct = w->ct; } + total->ct = ct; } /** @@ -1061,19 +883,18 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core) void ebpf_process_sum_values_for_pids(ebpf_process_stat_t *process, struct ebpf_pid_on_target *root) { memset(process, 0, sizeof(ebpf_process_stat_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_process_stat_t *in = &local_pid->process; - process->task_err += in->task_err; - process->release_call += in->release_call; - process->exit_call += in->exit_call; - process->create_thread += in->create_thread; - process->create_process += in->create_process; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *in = local_pid->process; + if (!in) + continue; - root = root->next; + process->task_err += in->task_err; + process->release_call += in->release_call; + process->exit_call += in->exit_call; + process->create_thread += in->create_thread; + process->create_process += in->create_process; } } @@ -1085,51 +906,50 @@ void ebpf_process_sum_values_for_pids(ebpf_process_stat_t *process, struct ebpf_ * * @param tbl_pid_stats_fd The mapped file descriptor for the hash table. * @param maps_per_core do I have hash maps per core? + * @param max_period max period to wait before remove from hash table. */ -void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core) +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core, uint32_t max_period) { - if (unlikely(!ebpf_all_pids)) + if (tbl_pid_stats_fd == -1) return; - struct ebpf_pid_stat *pids = ebpf_root_of_pids; // global list of all processes running - while (pids) { - if (pids->updated_twice) { - pids->read = 0; // mark it as not read, so that collect_data_for_pid() will read it - pids->updated = 0; - pids->merged = 0; - pids->children_count = 0; - pids->parent = NULL; - } else { - if (pids->updated) - pids->updated_twice = 1; - } - - pids = pids->next; - } - - read_proc_filesystem(); - - pids = ebpf_root_of_pids; // global list of all processes running + pids_fd[EBPF_PIDS_PROCESS_IDX] = tbl_pid_stats_fd; + size_t length = sizeof(ebpf_process_stat_t); + if (maps_per_core) + length *= ebpf_nprocs; if (tbl_pid_stats_fd != -1) { - size_t length = sizeof(ebpf_process_stat_t); - if (maps_per_core) - length *= ebpf_nprocs; uint32_t key = 0, next_key = 0; while (bpf_map_get_next_key(tbl_pid_stats_fd, &key, &next_key) == 0) { - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, 0); - if (!local_pid) - goto end_process_loop; - - ebpf_process_stat_t *w = &local_pid->process; if (bpf_map_lookup_elem(tbl_pid_stats_fd, &key, process_stat_vector)) { goto end_process_loop; } ebpf_process_apps_accumulator(process_stat_vector, maps_per_core); - memcpy(w, process_stat_vector, sizeof(ebpf_process_stat_t)); + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *w = local_pid->process; + if (!w) + local_pid->process = w = ebpf_process_allocate_publish(); + + if (!w->ct || w->ct != process_stat_vector[0].ct) { + w->ct = process_stat_vector[0].ct; + w->create_thread = process_stat_vector[0].create_thread; + w->exit_call = process_stat_vector[0].exit_call; + w->create_thread = process_stat_vector[0].create_thread; + w->create_process = process_stat_vector[0].create_process; + w->release_call = process_stat_vector[0].release_call; + w->task_err = process_stat_vector[0].task_err; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, tbl_pid_stats_fd, key, EBPF_PIDS_PROCESS_IDX); + ebpf_process_release_publish(w); + local_pid->process = NULL; + } + } end_process_loop: memset(process_stat_vector, 0, length); @@ -1137,24 +957,47 @@ end_process_loop: } } + struct ebpf_target *w; + for (w = apps_groups_root_target; w; w = w->next) { + if (unlikely(!(w->processes))) + continue; + + ebpf_process_sum_values_for_pids(&w->process, w->root_pid); + } + +} + +/** + * + */ +void ebpf_parse_proc_files() +{ + ebpf_pid_data_t *pids; + for (pids = ebpf_pids_link_list; pids;) { + if (kill(pids->pid, 0)) { // No PID found + ebpf_pid_data_t *next = pids->next; + ebpf_reset_specific_pid_data(pids); + pids = next; + continue; + } + + pids->not_updated = EBPF_CLEANUP_FACTOR; + pids->merged = 0; + pids->children_count = 0; + pids = pids->next; + } + + if (ebpf_read_proc_filesystem()) + return; + link_all_processes_to_their_parents(); apply_apps_groups_targets_inheritance(); apps_groups_targets_count = zero_all_targets(apps_groups_root_target); - // this has to be done, before the cleanup - // // concentrate everything on the targets - for (pids = ebpf_root_of_pids; pids; pids = pids->next) + for (pids = ebpf_pids_link_list; pids; pids = pids->next) aggregate_pid_on_target(pids->target, pids, NULL); - post_aggregate_targets(apps_groups_root_target); - - struct ebpf_target *w; - for (w = apps_groups_root_target; w; w = w->next) { - if (unlikely(!(w->processes))) - continue; - - ebpf_process_sum_values_for_pids(&w->process, w->root_pid); - } + ebpf_cleanup_exited_pids(); } diff --git a/src/collectors/ebpf.plugin/ebpf_apps.h b/src/collectors/ebpf.plugin/ebpf_apps.h index a2cbaf3b7..98c9995da 100644 --- a/src/collectors/ebpf.plugin/ebpf_apps.h +++ b/src/collectors/ebpf.plugin/ebpf_apps.h @@ -39,10 +39,55 @@ #include "ebpf_swap.h" #include "ebpf_vfs.h" -#define EBPF_MAX_COMPARE_NAME 100 +#define EBPF_MAX_COMPARE_NAME 95 #define EBPF_MAX_NAME 100 -#define EBPF_CLEANUP_FACTOR 10 +#define EBPF_CLEANUP_FACTOR 2 + +enum ebpf_pids_index { + EBPF_PIDS_PROCESS_IDX, + EBPF_PIDS_SOCKET_IDX, + EBPF_PIDS_CACHESTAT_IDX, + EBPF_PIDS_DCSTAT_IDX, + EBPF_PIDS_SWAP_IDX, + EBPF_PIDS_VFS_IDX, + EBPF_PIDS_FD_IDX, + EBPF_PIDS_SHM_IDX, + + EBPF_PIDS_PROC_FILE, + EBPF_PIDS_END_IDX +}; + +extern int pids_fd[EBPF_PIDS_END_IDX]; + +enum ebpf_main_index { + EBPF_MODULE_PROCESS_IDX, + EBPF_MODULE_SOCKET_IDX, + EBPF_MODULE_CACHESTAT_IDX, + EBPF_MODULE_SYNC_IDX, + EBPF_MODULE_DCSTAT_IDX, + EBPF_MODULE_SWAP_IDX, + EBPF_MODULE_VFS_IDX, + EBPF_MODULE_FILESYSTEM_IDX, + EBPF_MODULE_DISK_IDX, + EBPF_MODULE_MOUNT_IDX, + EBPF_MODULE_FD_IDX, + EBPF_MODULE_HARDIRQ_IDX, + EBPF_MODULE_SOFTIRQ_IDX, + EBPF_MODULE_OOMKILL_IDX, + EBPF_MODULE_SHM_IDX, + EBPF_MODULE_MDFLUSH_IDX, + EBPF_MODULE_FUNCTION_IDX, + /* THREADS MUST BE INCLUDED BEFORE THIS COMMENT */ + EBPF_OPTION_ALL_CHARTS, + EBPF_OPTION_VERSION, + EBPF_OPTION_HELP, + EBPF_OPTION_GLOBAL_CHART, + EBPF_OPTION_RETURN_MODE, + EBPF_OPTION_LEGACY, + EBPF_OPTION_CORE, + EBPF_OPTION_UNITTEST +}; // ---------------------------------------------------------------------------- // Structures used to read information from kernel ring @@ -63,10 +108,21 @@ typedef struct ebpf_process_stat { //Counter uint32_t task_err; - - uint8_t removeme; } ebpf_process_stat_t; +typedef struct __attribute__((packed)) ebpf_publish_process { + uint64_t ct; + + //Counter + uint32_t exit_call; + uint32_t release_call; + uint32_t create_process; + uint32_t create_thread; + + //Counter + uint32_t task_err; +} ebpf_publish_process_t; + // ---------------------------------------------------------------------------- // pid_stat // @@ -108,21 +164,246 @@ struct ebpf_target { struct ebpf_target *target; // the one that will be reported to netdata struct ebpf_target *next; }; - extern struct ebpf_target *apps_groups_default_target; extern struct ebpf_target *apps_groups_root_target; extern struct ebpf_target *users_root_target; extern struct ebpf_target *groups_root_target; +extern uint64_t collect_pids; + +// ebpf_pid_data +typedef struct __attribute__((packed)) ebpf_pid_data { + uint32_t pid; + uint32_t ppid; + uint64_t thread_collecting; + + char comm[EBPF_MAX_COMPARE_NAME + 1]; + char *cmdline; + + uint32_t has_proc_file; + uint32_t not_updated; + int children_count; // number of processes directly referencing this + int merged; + int sortlist; // higher numbers = top on the process tree + + struct ebpf_target *target; // the one that will be reported to netdata + struct ebpf_pid_data *parent; + struct ebpf_pid_data *prev; + struct ebpf_pid_data *next; + + netdata_publish_fd_stat_t *fd; + netdata_publish_swap_t *swap; + netdata_publish_shm_t *shm; // this has a leak issue + netdata_publish_dcstat_t *dc; + netdata_publish_vfs_t *vfs; + netdata_publish_cachestat_t *cachestat; + ebpf_publish_process_t *process; + ebpf_socket_publish_apps_t *socket; + +} ebpf_pid_data_t; + +extern ebpf_pid_data_t *ebpf_pids; +extern ebpf_pid_data_t *ebpf_pids_link_list; +extern size_t ebpf_all_pids_count; +extern size_t ebpf_hash_table_pids_count; +void ebpf_del_pid_entry(pid_t pid); + +static inline void *ebpf_cachestat_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_cachestat_t)); +} + +static inline void ebpf_cachestat_release_publish(netdata_publish_cachestat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_dcallocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_dcstat_t)); +} + +static inline void ebpf_dc_release_publish(netdata_publish_dcstat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_fd_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_fd_stat_t)); +} + +static inline void ebpf_fd_release_publish(netdata_publish_fd_stat_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_shm_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_shm_t)); +} + +static inline void ebpf_shm_release_publish(netdata_publish_shm_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_socket_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(ebpf_socket_publish_apps_t)); +} + +static inline void ebpf_socket_release_publish(ebpf_socket_publish_apps_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_swap_allocate_publish_swap() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_swap_t)); +} + +static inline void ebpf_swap_release_publish(netdata_publish_swap_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_vfs_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(netdata_publish_vfs_t)); +} + +static inline void ebpf_vfs_release_publish(netdata_publish_vfs_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline void *ebpf_process_allocate_publish() +{ + ebpf_hash_table_pids_count++; + return callocz(1, sizeof(ebpf_publish_process_t)); +} + +static inline void ebpf_process_release_publish(ebpf_publish_process_t *ptr) +{ + ebpf_hash_table_pids_count--; + freez(ptr); +} + +static inline ebpf_pid_data_t *ebpf_get_pid_data(uint32_t pid, uint32_t tgid, char *name, uint32_t idx) { + ebpf_pid_data_t *ptr = &ebpf_pids[pid]; + ptr->thread_collecting |= 1<<idx; + // The caller is getting data to work. + if (!name && idx != EBPF_PIDS_PROC_FILE) + return ptr; + + if (ptr->pid == pid) { + return ptr; + } + + ptr->pid = pid; + ptr->ppid = tgid; + + if (name) + strncpyz(ptr->comm, name, EBPF_MAX_COMPARE_NAME); + + if (likely(ebpf_pids_link_list)) + ebpf_pids_link_list->prev = ptr; + + ptr->next = ebpf_pids_link_list; + ebpf_pids_link_list = ptr; + if (idx == EBPF_PIDS_PROC_FILE) { + ebpf_all_pids_count++; + } + + return ptr; +} + +static inline void ebpf_release_pid_data(ebpf_pid_data_t *eps, int fd, uint32_t key, uint32_t idx) +{ + if (fd) { + bpf_map_delete_elem(fd, &key); + } + eps->thread_collecting &= ~(1<<idx); + if (!eps->thread_collecting && !eps->has_proc_file) { + ebpf_del_pid_entry((pid_t)key); + } +} + +static inline void ebpf_reset_specific_pid_data(ebpf_pid_data_t *ptr) +{ + int idx; + uint32_t pid = ptr->pid; + for (idx = EBPF_PIDS_PROCESS_IDX; idx < EBPF_PIDS_PROC_FILE; idx++) { + if (!(ptr->thread_collecting & (1<<idx))) { + continue; + } + // Check if we still have the map loaded + int fd = pids_fd[idx]; + if (fd <= STDERR_FILENO) + continue; + + bpf_map_delete_elem(fd, &pid); + ebpf_hash_table_pids_count--; + void *clean; + switch (idx) { + case EBPF_PIDS_PROCESS_IDX: + clean = ptr->process; + break; + case EBPF_PIDS_SOCKET_IDX: + clean = ptr->socket; + break; + case EBPF_PIDS_CACHESTAT_IDX: + clean = ptr->cachestat; + break; + case EBPF_PIDS_DCSTAT_IDX: + clean = ptr->dc; + break; + case EBPF_PIDS_SWAP_IDX: + clean = ptr->swap; + break; + case EBPF_PIDS_VFS_IDX: + clean = ptr->vfs; + break; + case EBPF_PIDS_FD_IDX: + clean = ptr->fd; + break; + case EBPF_PIDS_SHM_IDX: + clean = ptr->shm; + break; + default: + clean = NULL; + } + freez(clean); + } + + ebpf_del_pid_entry(pid); +} + typedef struct ebpf_pid_stat { - int32_t pid; + uint32_t pid; + uint64_t thread_collecting; char comm[EBPF_MAX_COMPARE_NAME + 1]; char *cmdline; uint32_t log_thrown; // char state; - int32_t ppid; + uint32_t ppid; int children_count; // number of processes directly referencing this unsigned char keep : 1; // 1 when we need to keep this process in memory even after it exited @@ -199,8 +480,6 @@ static inline void debug_log_int(const char *fmt, ...) // ---------------------------------------------------------------------------- // Exported variabled and functions // -extern struct ebpf_pid_stat **ebpf_all_pids; - int ebpf_read_apps_groups_conf(struct ebpf_target **apps_groups_default_target, struct ebpf_target **apps_groups_root_target, const char *path, @@ -216,7 +495,7 @@ int ebpf_read_hash_table(void *ep, int fd, uint32_t pid); int get_pid_comm(pid_t pid, size_t n, char *dest); -void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core); +void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core, uint32_t max_period); void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core); // The default value is at least 32 times smaller than maximum number of PIDs allowed on system, @@ -227,8 +506,7 @@ void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core); #define NETDATA_EBPF_ALLOC_MIN_ELEMENTS 256 // ARAL Sectiion -extern void ebpf_aral_init(void); -extern ebpf_pid_stat_t *ebpf_get_pid_entry(pid_t pid, pid_t tgid); +void ebpf_aral_init(void); extern ebpf_process_stat_t *process_stat_vector; extern ARAL *ebpf_aral_vfs_pid; @@ -240,7 +518,7 @@ extern ARAL *ebpf_aral_shm_pid; void ebpf_shm_aral_init(); netdata_publish_shm_t *ebpf_shm_stat_get(void); void ebpf_shm_release(netdata_publish_shm_t *stat); -void ebpf_cleanup_exited_pids(int max); +void ebpf_parse_proc_files(); // ARAL Section end diff --git a/src/collectors/ebpf.plugin/ebpf_cachestat.c b/src/collectors/ebpf.plugin/ebpf_cachestat.c index 379ff05bb..8c0260d51 100644 --- a/src/collectors/ebpf.plugin/ebpf_cachestat.c +++ b/src/collectors/ebpf.plugin/ebpf_cachestat.c @@ -330,9 +330,9 @@ static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every */ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_HIT_RATIO_CHART, + "", "Hit ratio", EBPF_COMMON_UNITS_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU, @@ -341,9 +341,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21100, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_DIRTY_CHART, + "", "Number of dirty pages", EBPF_CACHESTAT_UNITS_PAGE, NETDATA_CACHESTAT_SUBMENU, @@ -352,9 +352,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21101, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_HIT_CHART, + "", "Number of accessed files", EBPF_CACHESTAT_UNITS_HITS, NETDATA_CACHESTAT_SUBMENU, @@ -363,9 +363,9 @@ static void ebpf_obsolete_cachestat_services(ebpf_module_t *em, char *id) 21102, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_CACHESTAT_MISSES_CHART, + "", "Files out of page cache", EBPF_CACHESTAT_UNITS_MISSES, NETDATA_CACHESTAT_SUBMENU, @@ -525,9 +525,14 @@ void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em) */ static void ebpf_cachestat_exit(void *pptr) { + pids_fd[EBPF_PIDS_CACHESTAT_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_CACHESTAT_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_cachestat.thread) nd_thread_signal_cancel(ebpf_read_cachestat.thread); @@ -677,6 +682,9 @@ static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out, int maps_pe total->mark_page_accessed += w->mark_page_accessed; if (w->ct > ct) ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } total->ct = ct; } @@ -692,13 +700,14 @@ static void cachestat_apps_accumulator(netdata_cachestat_pid_t *out, int maps_pe static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, netdata_cachestat_pid_t *in) { out->ct = in->ct; - if (!out->current.mark_page_accessed) { - memcpy(&out->current, &in[0], sizeof(netdata_cachestat_pid_t)); - return; + if (out->current.mark_page_accessed) { + memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_t)); } - memcpy(&out->prev, &out->current, sizeof(netdata_cachestat_pid_t)); - memcpy(&out->current, &in[0], sizeof(netdata_cachestat_pid_t)); + out->current.account_page_dirtied = in[0].account_page_dirtied; + out->current.add_to_page_cache_lru = in[0].add_to_page_cache_lru; + out->current.mark_buffer_dirty = in[0].mark_buffer_dirty; + out->current.mark_page_accessed = in[0].mark_page_accessed; } /** @@ -707,8 +716,9 @@ static inline void cachestat_save_pid_values(netdata_publish_cachestat_t *out, n * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_cachestat_apps_table(int maps_per_core, int max_period) +static void ebpf_read_cachestat_apps_table(int maps_per_core, uint32_t max_period) { netdata_cachestat_pid_t *cv = cachestat_vector; int fd = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; @@ -724,17 +734,22 @@ static void ebpf_read_cachestat_apps_table(int maps_per_core, int max_period) cachestat_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, cv->tgid); - if (!local_pid) - goto end_cachestat_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *publish = local_pid->cachestat; + if (!publish) + local_pid->cachestat = publish = ebpf_cachestat_allocate_publish(); - netdata_publish_cachestat_t *publish = &local_pid->cachestat; if (!publish->ct || publish->ct != cv->ct){ cachestat_save_pid_values(publish, cv); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_CACHESTAT_IDX); + ebpf_cachestat_release_publish(publish); + local_pid->cachestat = NULL; + } } end_cachestat_loop: @@ -759,13 +774,14 @@ static void ebpf_update_cachestat_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - netdata_cachestat_pid_t *out = &pids->cachestat; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_cachestat_t *in = &local_pid->cachestat; + netdata_publish_cachestat_t *out = &pids->cachestat; - memcpy(out, &in->current, sizeof(netdata_cachestat_pid_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *in = local_pid->cachestat; + if (!in) + continue; + + memcpy(&out->current, &in->current, sizeof(netdata_cachestat_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -784,20 +800,19 @@ void ebpf_cachestat_sum_pids(netdata_publish_cachestat_t *publish, struct ebpf_p memcpy(&publish->prev, &publish->current,sizeof(publish->current)); memset(&publish->current, 0, sizeof(publish->current)); - netdata_cachestat_pid_t *dst = &publish->current; - while (root) { + netdata_cachestat_t *dst = &publish->current; + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_cachestat_t *w = &local_pid->cachestat; - netdata_cachestat_pid_t *src = &w->current; - dst->account_page_dirtied += src->account_page_dirtied; - dst->add_to_page_cache_lru += src->add_to_page_cache_lru; - dst->mark_buffer_dirty += src->mark_buffer_dirty; - dst->mark_page_accessed += src->mark_page_accessed; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_CACHESTAT_IDX); + netdata_publish_cachestat_t *w = local_pid->cachestat; + if (!w) + continue; - root = root->next; + netdata_cachestat_t *src = &w->current; + dst->account_page_dirtied += src->account_page_dirtied; + dst->add_to_page_cache_lru += src->add_to_page_cache_lru; + dst->mark_buffer_dirty += src->mark_buffer_dirty; + dst->mark_page_accessed += src->mark_page_accessed; } } @@ -834,13 +849,14 @@ void *ebpf_read_cachestat_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; + pids_fd[EBPF_PIDS_CACHESTAT_IDX] = cachestat_maps[NETDATA_CACHESTAT_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -1020,8 +1036,8 @@ void ebpf_cache_send_apps_data(struct ebpf_target *root) if (unlikely(!(w->charts_created & (1<<EBPF_MODULE_CACHESTAT_IDX)))) continue; - netdata_cachestat_pid_t *current = &w->cachestat.current; - netdata_cachestat_pid_t *prev = &w->cachestat.prev; + netdata_cachestat_t *current = &w->cachestat.current; + netdata_cachestat_t *prev = &w->cachestat.prev; uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; @@ -1067,16 +1083,14 @@ void ebpf_cachestat_sum_cgroup_pids(netdata_publish_cachestat_t *publish, struct memcpy(&publish->prev, &publish->current,sizeof(publish->current)); memset(&publish->current, 0, sizeof(publish->current)); - netdata_cachestat_pid_t *dst = &publish->current; - while (root) { - netdata_cachestat_pid_t *src = &root->cachestat; + netdata_cachestat_t *dst = &publish->current; + for (; root; root = root->next) { + netdata_cachestat_t *src = &root->cachestat.current; dst->account_page_dirtied += src->account_page_dirtied; dst->add_to_page_cache_lru += src->add_to_page_cache_lru; dst->mark_buffer_dirty += src->mark_buffer_dirty; dst->mark_page_accessed += src->mark_page_accessed; - - root = root->next; } } @@ -1091,8 +1105,8 @@ void ebpf_cachestat_calc_chart_values() for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) { ebpf_cachestat_sum_cgroup_pids(&ect->publish_cachestat, ect->pids); - netdata_cachestat_pid_t *current = &ect->publish_cachestat.current; - netdata_cachestat_pid_t *prev = &ect->publish_cachestat.prev; + netdata_cachestat_t *current = &ect->publish_cachestat.current; + netdata_cachestat_t *prev = &ect->publish_cachestat.prev; uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed; uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty; @@ -1205,19 +1219,19 @@ static void ebpf_send_systemd_cachestat_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_HIT_RATIO_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_HIT_RATIO_CHART, ""); write_chart_dimension("percentage", (long long)ect->publish_cachestat.ratio); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_DIRTY_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_DIRTY_CHART, ""); write_chart_dimension("pages", (long long)ect->publish_cachestat.dirty); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_HIT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_HIT_CHART, ""); write_chart_dimension("hits", (long long)ect->publish_cachestat.hit); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_CACHESTAT_MISSES_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_CACHESTAT_MISSES_CHART, ""); write_chart_dimension("misses", (long long)ect->publish_cachestat.miss); ebpf_write_end_chart(); } diff --git a/src/collectors/ebpf.plugin/ebpf_cachestat.h b/src/collectors/ebpf.plugin/ebpf_cachestat.h index 79d22b43d..6bb91b641 100644 --- a/src/collectors/ebpf.plugin/ebpf_cachestat.h +++ b/src/collectors/ebpf.plugin/ebpf_cachestat.h @@ -33,10 +33,10 @@ #define NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT "cgroup.cachestat_hits" #define NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT "cgroup.cachestat_misses" -#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "systemd.services.cachestat_ratio" -#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "systemd.services.cachestat_dirties" -#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "systemd.services.cachestat_hits" -#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "systemd.services.cachestat_misses" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_RATIO_CONTEXT "systemd.service.cachestat_ratio" +#define NETDATA_SYSTEMD_CACHESTAT_MODIFIED_CACHE_CONTEXT "systemd.service.cachestat_dirties" +#define NETDATA_SYSTEMD_CACHESTAT_HIT_FILE_CONTEXT "systemd.service.cachestat_hits" +#define NETDATA_SYSTEMD_CACHESTAT_MISS_FILES_CONTEXT "systemd.service.cachestat_misses" // variables enum cachestat_counters { @@ -69,20 +69,27 @@ enum cachestat_tables { NETDATA_CACHESTAT_CTRL }; -typedef struct netdata_publish_cachestat_pid { +typedef struct netdata_cachestat_pid { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t add_to_page_cache_lru; - uint64_t mark_page_accessed; - uint64_t account_page_dirtied; - uint64_t mark_buffer_dirty; + uint32_t add_to_page_cache_lru; + uint32_t mark_page_accessed; + uint32_t account_page_dirtied; + uint32_t mark_buffer_dirty; } netdata_cachestat_pid_t; -typedef struct netdata_publish_cachestat { +typedef struct __attribute__((packed)) netdata_cachestat { + uint32_t add_to_page_cache_lru; + uint32_t mark_page_accessed; + uint32_t account_page_dirtied; + uint32_t mark_buffer_dirty; +} netdata_cachestat_t; + +typedef struct __attribute__((packed)) netdata_publish_cachestat { uint64_t ct; long long ratio; @@ -90,8 +97,8 @@ typedef struct netdata_publish_cachestat { long long hit; long long miss; - netdata_cachestat_pid_t current; - netdata_cachestat_pid_t prev; + netdata_cachestat_t current; + netdata_cachestat_t prev; } netdata_publish_cachestat_t; void *ebpf_cachestat_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_cgroup.c b/src/collectors/ebpf.plugin/ebpf_cgroup.c index ae3bf3f8a..9e1fa8231 100644 --- a/src/collectors/ebpf.plugin/ebpf_cgroup.c +++ b/src/collectors/ebpf.plugin/ebpf_cgroup.c @@ -329,9 +329,9 @@ void ebpf_parse_cgroup_shm_data() */ void ebpf_create_charts_on_systemd(ebpf_systemd_args_t *chart) { - ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, - chart->id, + ebpf_write_chart_cmd(chart->id, chart->suffix, + "", chart->title, chart->units, chart->family, @@ -340,9 +340,23 @@ void ebpf_create_charts_on_systemd(ebpf_systemd_args_t *chart) chart->order, chart->update_every, chart->module); - ebpf_create_chart_labels("service_name", chart->id, RRDLABEL_SRC_AUTO); + char service_name[512]; + snprintfz(service_name, 511, "%s", (!strstr(chart->id, "systemd_")) ? chart->id : (chart->id + 8)); + ebpf_create_chart_labels("service_name", service_name, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - fprintf(stdout, "DIMENSION %s '' %s 1 1\n", chart->dimension, chart->algorithm); + // Let us keep original string that can be used in another place. Chart creation does not happen frequently. + char *move = strdupz(chart->dimension); + while (move) { + char *next_dim = strchr(move, ','); + if (next_dim) { + *next_dim = '\0'; + next_dim++; + } + + fprintf(stdout, "DIMENSION %s '' %s 1 1\n", move, chart->algorithm); + move = next_dim; + } + freez(move); } // -------------------------------------------------------------------------------------------------------------------- diff --git a/src/collectors/ebpf.plugin/ebpf_cgroup.h b/src/collectors/ebpf.plugin/ebpf_cgroup.h index 87df7bed2..65c8212bb 100644 --- a/src/collectors/ebpf.plugin/ebpf_cgroup.h +++ b/src/collectors/ebpf.plugin/ebpf_cgroup.h @@ -9,20 +9,18 @@ #include "ebpf.h" #include "ebpf_apps.h" -#define NETDATA_SERVICE_FAMILY "systemd" - struct pid_on_target2 { int32_t pid; int updated; netdata_publish_swap_t swap; - netdata_fd_stat_t fd; + netdata_publish_fd_stat_t fd; netdata_publish_vfs_t vfs; - ebpf_process_stat_t ps; + ebpf_publish_process_t ps; netdata_dcstat_pid_t dc; netdata_publish_shm_t shm; netdata_socket_t socket; - netdata_cachestat_pid_t cachestat; + netdata_publish_cachestat_t cachestat; struct pid_on_target2 *next; }; @@ -57,9 +55,9 @@ typedef struct ebpf_cgroup_target { uint32_t updated; netdata_publish_swap_t publish_systemd_swap; - netdata_fd_stat_t publish_systemd_fd; + netdata_publish_fd_stat_t publish_systemd_fd; netdata_publish_vfs_t publish_systemd_vfs; - ebpf_process_stat_t publish_systemd_ps; + ebpf_publish_process_t publish_systemd_ps; netdata_publish_dcstat_t publish_dc; int oomkill; netdata_publish_shm_t publish_shm; diff --git a/src/collectors/ebpf.plugin/ebpf_dcstat.c b/src/collectors/ebpf.plugin/ebpf_dcstat.c index d9455ed9c..e6053cb4a 100644 --- a/src/collectors/ebpf.plugin/ebpf_dcstat.c +++ b/src/collectors/ebpf.plugin/ebpf_dcstat.c @@ -279,9 +279,9 @@ static void ebpf_obsolete_specific_dc_charts(char *type, int update_every); */ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_HIT_CHART, + "", "Percentage of files inside directory cache", EBPF_COMMON_UNITS_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -290,9 +290,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21200, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REFERENCE_CHART, + "", "Count file access", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -301,9 +301,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21201, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REQUEST_NOT_CACHE_CHART, + "", "Files not present inside directory cache", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -312,9 +312,9 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em, char *id) 21202, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_DC_REQUEST_NOT_FOUND_CHART, + "", "Files not found", EBPF_COMMON_UNITS_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU, @@ -453,9 +453,14 @@ static void ebpf_obsolete_dc_global(ebpf_module_t *em) */ static void ebpf_dcstat_exit(void *pptr) { + pids_fd[EBPF_PIDS_DCSTAT_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_DCSTAT_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_dcstat.thread) nd_thread_signal_cancel(ebpf_read_dcstat.thread); @@ -524,6 +529,9 @@ static void ebpf_dcstat_apps_accumulator(netdata_dcstat_pid_t *out, int maps_per if (w->ct > ct) ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } total->ct = ct; } @@ -534,8 +542,9 @@ static void ebpf_dcstat_apps_accumulator(netdata_dcstat_pid_t *out, int maps_per * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_dc_apps_table(int maps_per_core, int max_period) +static void ebpf_read_dc_apps_table(int maps_per_core, uint32_t max_period) { netdata_dcstat_pid_t *cv = dcstat_vector; int fd = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; @@ -551,15 +560,25 @@ static void ebpf_read_dc_apps_table(int maps_per_core, int max_period) ebpf_dcstat_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(key, cv->tgid); - if (pid_stat) { - netdata_publish_dcstat_t *publish = &pid_stat->dc; - if (!publish->ct || publish->ct != cv->ct) { - memcpy(&publish->curr, &cv[0], sizeof(netdata_dcstat_pid_t)); - pid_stat->not_updated = 0; - } else if (++pid_stat->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - pid_stat->not_updated = 0; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *publish = pid_stat->dc; + if (!publish) + pid_stat->dc = publish = ebpf_dcallocate_publish(); + + if (!publish->ct || publish->ct != cv->ct) { + publish->ct = cv->ct; + publish->curr.not_found = cv[0].not_found; + publish->curr.file_system = cv[0].file_system; + publish->curr.cache_access = cv[0].cache_access; + + pid_stat->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(pid_stat); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(pid_stat, fd, key, EBPF_PIDS_DCSTAT_IDX); + ebpf_dc_release_publish(publish); + pid_stat->dc = NULL; } } @@ -580,20 +599,17 @@ end_dc_loop: */ void ebpf_dcstat_sum_pids(netdata_publish_dcstat_t *publish, struct ebpf_pid_on_target *root) { - memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); - netdata_dcstat_pid_t *dst = &publish->curr; - while (root) { + memset(&publish->curr, 0, sizeof(netdata_publish_dcstat_pid_t)); + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_publish_dcstat_t *w = &pid_stat->dc; - netdata_dcstat_pid_t *src = &w->curr; - dst->cache_access += src->cache_access; - dst->file_system += src->file_system; - dst->not_found += src->not_found; - } + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *w = pid_stat->dc; + if (!w) + continue; - root = root->next; + publish->curr.cache_access += w->curr.cache_access; + publish->curr.file_system += w->curr.file_system; + publish->curr.not_found += w->curr.not_found; } } @@ -635,13 +651,17 @@ void *ebpf_read_dcstat_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_DCSTAT_IDX] = dcstat_maps[NETDATA_DCSTAT_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -771,12 +791,12 @@ static void ebpf_update_dc_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_dcstat_pid_t *out = &pids->dc; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_dcstat_t *in = &local_pid->dc; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_DCSTAT_IDX); + netdata_publish_dcstat_t *in = local_pid->dc; + if (!in) + continue; - memcpy(out, &in->curr, sizeof(netdata_dcstat_pid_t)); - } + memcpy(out, &in->curr, sizeof(netdata_publish_dcstat_pid_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -1001,13 +1021,12 @@ static void ebpf_obsolete_specific_dc_charts(char *type, int update_every) void ebpf_dc_sum_cgroup_pids(netdata_publish_dcstat_t *publish, struct pid_on_target2 *root) { memset(&publish->curr, 0, sizeof(netdata_dcstat_pid_t)); - netdata_dcstat_pid_t *dst = &publish->curr; while (root) { netdata_dcstat_pid_t *src = &root->dc; - dst->cache_access += src->cache_access; - dst->file_system += src->file_system; - dst->not_found += src->not_found; + publish->curr.cache_access += src->cache_access; + publish->curr.file_system += src->file_system; + publish->curr.not_found += src->not_found; root = root->next; } @@ -1139,22 +1158,22 @@ static void ebpf_send_systemd_dc_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_HIT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_HIT_CHART, ""); write_chart_dimension("percentage", (long long) ect->publish_dc.ratio); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REFERENCE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REFERENCE_CHART, ""); write_chart_dimension("files", (long long) ect->publish_dc.cache_access); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REQUEST_NOT_CACHE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REQUEST_NOT_CACHE_CHART, ""); value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : (long long )ect->publish_dc.curr.file_system - (long long)ect->publish_dc.prev.file_system; ect->publish_dc.prev.file_system = ect->publish_dc.curr.file_system; write_chart_dimension("files", (long long) value); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_DC_REQUEST_NOT_FOUND_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_DC_REQUEST_NOT_FOUND_CHART, ""); value = (collected_number) (!ect->publish_dc.cache_access) ? 0 : (long long)ect->publish_dc.curr.not_found - (long long)ect->publish_dc.prev.not_found; diff --git a/src/collectors/ebpf.plugin/ebpf_dcstat.h b/src/collectors/ebpf.plugin/ebpf_dcstat.h index 82f21f48c..a7e9f82b7 100644 --- a/src/collectors/ebpf.plugin/ebpf_dcstat.h +++ b/src/collectors/ebpf.plugin/ebpf_dcstat.h @@ -3,6 +3,8 @@ #ifndef NETDATA_EBPF_DCSTAT_H #define NETDATA_EBPF_DCSTAT_H 1 +#include "ebpf.h" + // Module name & description #define NETDATA_EBPF_MODULE_NAME_DCSTAT "dcstat" #define NETDATA_EBPF_DC_MODULE_DESC "Monitor file access using directory cache. This thread is integrated with apps and cgroup." @@ -27,10 +29,10 @@ #define NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT "cgroup.dc_not_cache" #define NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT "cgroup.dc_not_found" -#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "systemd.services.dc_ratio" -#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "systemd.services.dc_reference" -#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "systemd.services.dc_not_cache" -#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "systemd.services.dc_not_found" +#define NETDATA_SYSTEMD_DC_HIT_RATIO_CONTEXT "systemd.service.dc_ratio" +#define NETDATA_SYSTEMD_DC_REFERENCE_CONTEXT "systemd.service.dc_reference" +#define NETDATA_SYSTEMD_DC_NOT_CACHE_CONTEXT "systemd.service.dc_not_cache" +#define NETDATA_SYSTEMD_DC_NOT_FOUND_CONTEXT "systemd.service.dc_not_found" // ARAL name #define NETDATA_EBPF_DCSTAT_ARAL_NAME "ebpf_dcstat" @@ -69,26 +71,32 @@ enum directory_cache_targets { NETDATA_DC_TARGET_D_LOOKUP }; -typedef struct netdata_publish_dcstat_pid { +typedef struct __attribute__((packed)) netdata_publish_dcstat_pid { + uint64_t cache_access; + uint32_t file_system; + uint32_t not_found; +} netdata_publish_dcstat_pid_t; + +typedef struct netdata_dcstat_pid { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t cache_access; - uint64_t file_system; - uint64_t not_found; + uint32_t cache_access; + uint32_t file_system; + uint32_t not_found; } netdata_dcstat_pid_t; -typedef struct netdata_publish_dcstat { +typedef struct __attribute__((packed)) netdata_publish_dcstat { uint64_t ct; long long ratio; long long cache_access; - netdata_dcstat_pid_t curr; - netdata_dcstat_pid_t prev; + netdata_publish_dcstat_pid_t curr; + netdata_publish_dcstat_pid_t prev; } netdata_publish_dcstat_t; void *ebpf_dcstat_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_fd.c b/src/collectors/ebpf.plugin/ebpf_fd.c index 4025931f7..61a9595cc 100644 --- a/src/collectors/ebpf.plugin/ebpf_fd.c +++ b/src/collectors/ebpf.plugin/ebpf_fd.c @@ -365,9 +365,9 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em); */ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_OPEN, + "", "Number of open files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -377,9 +377,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, + "", "Fails to open files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -389,9 +389,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_CLOSED, + "", "Files closed", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -401,9 +401,9 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, + "", "Fails to close files", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_FILE_GROUP, @@ -548,9 +548,14 @@ static void ebpf_obsolete_fd_global(ebpf_module_t *em) */ static void ebpf_fd_exit(void *pptr) { + pids_fd[EBPF_PIDS_FD_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_FD_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_fd.thread) nd_thread_signal_cancel(ebpf_read_fd.thread); @@ -656,12 +661,19 @@ static void fd_apps_accumulator(netdata_fd_stat_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; netdata_fd_stat_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { netdata_fd_stat_t *w = &out[i]; total->open_call += w->open_call; total->close_call += w->close_call; total->open_err += w->open_err; total->close_err += w->close_err; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -671,8 +683,9 @@ static void fd_apps_accumulator(netdata_fd_stat_t *out, int maps_per_core) * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_fd_apps_table(int maps_per_core, int max_period) +static void ebpf_read_fd_apps_table(int maps_per_core, uint32_t max_period) { netdata_fd_stat_t *fv = fd_vector; int fd = fd_maps[NETDATA_FD_PID_STATS].map_fd; @@ -688,15 +701,26 @@ static void ebpf_read_fd_apps_table(int maps_per_core, int max_period) fd_apps_accumulator(fv, maps_per_core); - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(key, fv->tgid); - if (pid_stat) { - netdata_fd_stat_t *publish_fd = &pid_stat->fd; - if (!publish_fd->ct || publish_fd->ct != fv->ct) { - memcpy(publish_fd, &fv[0], sizeof(netdata_fd_stat_t)); - pid_stat->not_updated = 0; - } else if (++pid_stat->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - pid_stat->not_updated = 0; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(key, fv->tgid, fv->name, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *publish_fd = pid_stat->fd; + if (!publish_fd) + pid_stat->fd = publish_fd = ebpf_fd_allocate_publish(); + + if (!publish_fd->ct || publish_fd->ct != fv->ct) { + publish_fd->ct = fv->ct; + publish_fd->open_call = fv->open_call; + publish_fd->close_call = fv->close_call; + publish_fd->open_err = fv->open_err; + publish_fd->close_err = fv->close_err; + + pid_stat->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(pid_stat); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(pid_stat, fd, key, EBPF_PIDS_FD_IDX); + ebpf_fd_release_publish(publish_fd); + pid_stat->fd = NULL; } } @@ -719,18 +743,17 @@ static void ebpf_fd_sum_pids(netdata_fd_stat_t *fd, struct ebpf_pid_on_target *r { memset(fd, 0, sizeof(netdata_fd_stat_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_fd_stat_t *w = &pid_stat->fd; - fd->open_call += w->open_call; - fd->close_call += w->close_call; - fd->open_err += w->open_err; - fd->close_err += w->close_err; - } + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *w = pid_stat->fd; + if (!w) + continue; - root = root->next; + fd->open_call += w->open_call; + fd->close_call += w->close_call; + fd->open_err += w->open_err; + fd->close_err += w->close_err; } } @@ -767,13 +790,17 @@ void *ebpf_read_fd_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; - usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + int period = USEC_PER_SEC; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_FD_IDX] = fd_maps[NETDATA_FD_PID_STATS].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -815,13 +842,12 @@ static void ebpf_update_fd_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - netdata_fd_stat_t *out = &pids->fd; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_fd_stat_t *in = &local_pid->fd; - - memcpy(out, in, sizeof(netdata_fd_stat_t)); - } + netdata_publish_fd_stat_t *out = &pids->fd; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_FD_IDX); + netdata_publish_fd_stat_t *in = local_pid->fd; + if (!in) + continue; + memcpy(out, in, sizeof(netdata_publish_fd_stat_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -872,13 +898,13 @@ void ebpf_fd_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) * @param fd structure used to store data * @param pids input data */ -static void ebpf_fd_sum_cgroup_pids(netdata_fd_stat_t *fd, struct pid_on_target2 *pids) +static void ebpf_fd_sum_cgroup_pids(netdata_publish_fd_stat_t *fd, struct pid_on_target2 *pids) { netdata_fd_stat_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - netdata_fd_stat_t *w = &pids->fd; + netdata_publish_fd_stat_t *w = &pids->fd; accumulator.open_err += w->open_err; accumulator.open_call += w->open_call; @@ -995,7 +1021,7 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em) * @param type chart type * @param values structure with values that will be sent to netdata */ -static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em) +static void ebpf_send_specific_fd_data(char *type, netdata_publish_fd_stat_t *values, ebpf_module_t *em) { ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call); @@ -1120,22 +1146,22 @@ static void ebpf_send_systemd_fd_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN, ""); write_chart_dimension("calls", ect->publish_systemd_fd.open_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_fd.open_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSED); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSED, ""); write_chart_dimension("calls", ect->publish_systemd_fd.close_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_fd.close_err); ebpf_write_end_chart(); } @@ -1463,7 +1489,8 @@ void *ebpf_fd_thread(void *ptr) pthread_mutex_unlock(&lock); - ebpf_read_fd.thread = nd_thread_create(ebpf_read_fd.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_fd_thread, em); + ebpf_read_fd.thread = nd_thread_create(ebpf_read_fd.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_fd_thread, em); fd_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_fd.h b/src/collectors/ebpf.plugin/ebpf_fd.h index d4975940e..90ecdb13e 100644 --- a/src/collectors/ebpf.plugin/ebpf_fd.h +++ b/src/collectors/ebpf.plugin/ebpf_fd.h @@ -32,14 +32,25 @@ #define NETDATA_CGROUP_FD_CLOSE_CONTEXT "cgroup.fd_close" #define NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT "cgroup.fd_close_error" -#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "systemd.services.fd_open" -#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "systemd.services.fd_open_error" -#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "systemd.services.fd_close" -#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "systemd.services.fd_close_error" +#define NETDATA_SYSTEMD_FD_OPEN_CONTEXT "systemd.service.fd_open" +#define NETDATA_SYSTEMD_FD_OPEN_ERR_CONTEXT "systemd.service.fd_open_error" +#define NETDATA_SYSTEMD_FD_CLOSE_CONTEXT "systemd.service.fd_close" +#define NETDATA_SYSTEMD_FD_CLOSE_ERR_CONTEXT "systemd.service.fd_close_error" // ARAL name #define NETDATA_EBPF_FD_ARAL_NAME "ebpf_fd" +typedef struct __attribute__((packed)) netdata_publish_fd_stat { + uint64_t ct; + + uint32_t open_call; // Open syscalls (open and openat) + uint32_t close_call; // Close syscall (close) + + // Errors + uint32_t open_err; + uint32_t close_err; +} netdata_publish_fd_stat_t; + typedef struct netdata_fd_stat { uint64_t ct; uint32_t tgid; diff --git a/src/collectors/ebpf.plugin/ebpf_filesystem.c b/src/collectors/ebpf.plugin/ebpf_filesystem.c index c56dea4b1..1187b03e9 100644 --- a/src/collectors/ebpf.plugin/ebpf_filesystem.c +++ b/src/collectors/ebpf.plugin/ebpf_filesystem.c @@ -334,6 +334,46 @@ static inline int ebpf_fs_load_and_attach(ebpf_local_maps_t *map, struct filesys *****************************************************************/ /** + * Obsolete Cleanup Struct + * + * Clean allocatged data durinc obsolete steps + * + * @param efp + */ +static void ebpf_obsolete_cleanup_struct(ebpf_filesystem_partitions_t *efp) { + freez(efp->hread.name); + efp->hread.name = NULL; + freez(efp->hread.title); + efp->hread.title = NULL; + freez(efp->hread.ctx); + efp->hread.ctx = NULL; + + freez(efp->hwrite.name); + efp->hwrite.name = NULL; + freez(efp->hwrite.title); + efp->hwrite.title = NULL; + freez(efp->hwrite.ctx); + efp->hwrite.ctx = NULL; + + freez(efp->hopen.name); + efp->hopen.name = NULL; + freez(efp->hopen.title); + efp->hopen.title = NULL; + freez(efp->hopen.ctx); + efp->hopen.ctx = NULL; + + freez(efp->hadditional.name); + efp->hadditional.name = NULL; + freez(efp->hadditional.title); + efp->hadditional.title = NULL; + freez(efp->hadditional.ctx); + efp->hadditional.ctx = NULL; + + freez(efp->family_name); + efp->family_name = NULL; +} + +/** * Create Filesystem chart * * Create latency charts @@ -348,7 +388,7 @@ static void ebpf_obsolete_fs_charts(int update_every) ebpf_filesystem_partitions_t *efp = &localfs[i]; uint32_t flags = efp->flags; if ((flags & test) == test) { - flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED; + flags &= ~test; ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name, "", @@ -370,6 +410,8 @@ static void ebpf_obsolete_fs_charts(int update_every) EBPF_COMMON_UNITS_CALLS_PER_SEC, efp->family_name, NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order, update_every); + + ebpf_obsolete_cleanup_struct(efp); } efp->flags = flags; } @@ -395,9 +437,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each read request.", efp->filesystem); snprintfz(family, sizeof(family) - 1, "%s_latency", efp->family); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_read_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.read_latency"); efp->hread.name = strdupz(chart_name); efp->hread.title = strdupz(title); - efp->hread.ctx = NULL; + efp->hread.ctx = strdupz(ctx); efp->hread.order = order; efp->family_name = strdupz(family); @@ -412,9 +455,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each write request.", efp->filesystem); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_write_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.write_latency"); efp->hwrite.name = strdupz(chart_name); efp->hwrite.title = strdupz(title); - efp->hwrite.ctx = NULL; + efp->hwrite.ctx = strdupz(ctx); efp->hwrite.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name, efp->hwrite.title, @@ -427,9 +471,10 @@ static void ebpf_create_fs_charts(int update_every) snprintfz(title, sizeof(title) - 1, "%s latency for each open request.", efp->filesystem); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_open_latency", efp->filesystem); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.open_latency"); efp->hopen.name = strdupz(chart_name); efp->hopen.title = strdupz(title); - efp->hopen.ctx = NULL; + efp->hopen.ctx = strdupz(ctx); efp->hopen.order = order; ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title, @@ -443,7 +488,7 @@ static void ebpf_create_fs_charts(int update_every) char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync"; snprintfz(title, sizeof(title) - 1, "%s latency for each %s request.", efp->filesystem, type); snprintfz(chart_name, sizeof(chart_name) - 1, "%s_%s_latency", efp->filesystem, type); - snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", type); + snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", efp->filesystem); efp->hadditional.name = strdupz(chart_name); efp->hadditional.title = strdupz(title); efp->hadditional.ctx = strdupz(ctx); @@ -499,11 +544,14 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em) if (!efp->fs_obj) { em->info.thread_name = saved_name; em->kernels = kernels; + pthread_mutex_unlock(&lock); + return -1; + } else if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, + efp->functions, NULL)) { + em->info.thread_name = saved_name; + em->kernels = kernels; + pthread_mutex_unlock(&lock); return -1; - } else { - if (ebpf_fs_load_and_attach(em->maps, efp->fs_obj, - efp->functions, NULL)) - return -1; } } #endif @@ -572,7 +620,9 @@ static int ebpf_read_local_partitions() ebpf_filesystem_partitions_t *w = &localfs[i]; if (w->enabled && (!strcmp(fs, w->filesystem) || (w->optional_filesystem && !strcmp(fs, w->optional_filesystem)))) { - localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + if (!(localfs[i].flags & NETDATA_FILESYSTEM_FLAG_CHART_CREATED)) + localfs[i].flags |= NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM; + localfs[i].flags &= ~NETDATA_FILESYSTEM_REMOVE_CHARTS; count++; break; @@ -756,8 +806,8 @@ static void ebpf_filesystem_exit(void *pptr) pthread_mutex_lock(&lock); ebpf_obsolete_filesystem_global(em); - pthread_mutex_unlock(&lock); fflush(stdout); + pthread_mutex_unlock(&lock); } ebpf_filesystem_cleanup_ebpf_data(); @@ -889,10 +939,10 @@ static void read_filesystem_tables(int maps_per_core) */ void ebpf_filesystem_read_hash(ebpf_module_t *em) { - ebpf_obsolete_fs_charts(em->update_every); - (void) ebpf_update_partitions(em); + ebpf_obsolete_fs_charts(em->update_every); + if (em->optional) return; diff --git a/src/collectors/ebpf.plugin/ebpf_functions.c b/src/collectors/ebpf.plugin/ebpf_functions.c index 4a43bf434..8e9fb01ed 100644 --- a/src/collectors/ebpf.plugin/ebpf_functions.c +++ b/src/collectors/ebpf.plugin/ebpf_functions.c @@ -331,7 +331,7 @@ static void ebpf_function_socket_manipulation(const char *transaction, "Filters can be combined. Each filter can be given only one time. Default all ports\n" }; -for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { + for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { const char *keyword = get_word(words, num_words, i); if (!keyword) break; @@ -428,6 +428,7 @@ for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) { ebpf_socket_clean_judy_array_unsafe(); rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); + collect_pids |= 1<<EBPF_MODULE_SOCKET_IDX; pthread_mutex_lock(&ebpf_exit_cleanup); if (ebpf_function_start_thread(em, period)) { ebpf_function_error(transaction, diff --git a/src/collectors/ebpf.plugin/ebpf_oomkill.c b/src/collectors/ebpf.plugin/ebpf_oomkill.c index 8ecd0883c..34361550b 100644 --- a/src/collectors/ebpf.plugin/ebpf_oomkill.c +++ b/src/collectors/ebpf.plugin/ebpf_oomkill.c @@ -55,9 +55,9 @@ static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every); */ static void ebpf_obsolete_oomkill_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_OOMKILL_CHART, + "", "Systemd service OOM kills.", EBPF_OOMKILL_UNIT_KILLS, NETDATA_EBPF_MEMORY_GROUP, @@ -133,6 +133,10 @@ static void oomkill_cleanup(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_OOMKILL_IDX); + pthread_mutex_unlock(&lock); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { pthread_mutex_lock(&lock); @@ -242,7 +246,7 @@ static void ebpf_create_systemd_oomkill_charts(int update_every) .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, .order = 20191, .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_CGROUP_OOMKILLS_CONTEXT, + .context = NETDATA_SYSTEMD_OOMKILLS_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_OOMKILL, .update_every = 0, .suffix = NETDATA_OOMKILL_CHART, @@ -276,7 +280,7 @@ static void ebpf_send_systemd_oomkill_charts() if (unlikely(!(ect->flags & NETDATA_EBPF_SERVICES_HAS_OOMKILL_CHART)) ) { continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_OOMKILL_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_OOMKILL_CHART, ""); write_chart_dimension(oomkill_publish_aggregated.dimension, (long long) ect->oomkill); ect->oomkill = 0; ebpf_write_end_chart(); @@ -549,7 +553,7 @@ void *ebpf_oomkill_thread(void *ptr) em->maps = oomkill_maps; #define NETDATA_DEFAULT_OOM_DISABLED_MSG "Disabling OOMKILL thread, because" - if (unlikely(!ebpf_all_pids || !em->apps_charts)) { + if (unlikely(!em->apps_charts)) { // When we are not running integration with apps, we won't fill necessary variables for this thread to run, so // we need to disable it. pthread_mutex_lock(&ebpf_exit_cleanup); diff --git a/src/collectors/ebpf.plugin/ebpf_oomkill.h b/src/collectors/ebpf.plugin/ebpf_oomkill.h index 0d02da9d3..0504181c2 100644 --- a/src/collectors/ebpf.plugin/ebpf_oomkill.h +++ b/src/collectors/ebpf.plugin/ebpf_oomkill.h @@ -28,6 +28,7 @@ typedef uint8_t oomkill_ebpf_val_t; // Contexts #define NETDATA_CGROUP_OOMKILLS_CONTEXT "cgroup.oomkills" +#define NETDATA_SYSTEMD_OOMKILLS_CONTEXT "systemd.oomkills" extern struct config oomkill_config; void *ebpf_oomkill_thread(void *ptr); diff --git a/src/collectors/ebpf.plugin/ebpf_process.c b/src/collectors/ebpf.plugin/ebpf_process.c index e5756fa3c..d2810f899 100644 --- a/src/collectors/ebpf.plugin/ebpf_process.c +++ b/src/collectors/ebpf.plugin/ebpf_process.c @@ -229,13 +229,13 @@ static void ebpf_update_process_cgroup() struct pid_on_target2 *pids; for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; - ebpf_process_stat_t *out = &pids->ps; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_process_stat_t *in = &local_pid->process; + ebpf_publish_process_t *out = &pids->ps; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_PROCESS_IDX); + ebpf_publish_process_t *in = local_pid->process; + if (!in) + continue; - memcpy(out, in, sizeof(ebpf_process_stat_t)); - } + memcpy(out, in, sizeof(ebpf_publish_process_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -445,9 +445,9 @@ static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em) */ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_PROCESS, + "", "Process started", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -456,9 +456,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20065, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_THREAD, + "", "Threads started", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -467,9 +467,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20066, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_CLOSE, + "", "Tasks starts exit process.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -478,9 +478,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) 20067, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_EXIT, + "", "Tasks closed", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -490,9 +490,9 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_TASK_ERROR, + "", "Errors to create process or threads.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_PROCESS_GROUP, @@ -691,9 +691,14 @@ static void ebpf_process_disable_tracepoints() */ static void ebpf_process_exit(void *pptr) { + pids_fd[EBPF_PIDS_PROCESS_IDX] = -1; ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_PROCESS_IDX); + pthread_mutex_unlock(&lock); + if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) { pthread_mutex_lock(&lock); if (em->cgroup_charts) { @@ -746,13 +751,13 @@ static void ebpf_process_exit(void *pptr) * @param ps structure used to store data * @param pids input data */ -static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_target2 *pids) +static void ebpf_process_sum_cgroup_pids(ebpf_publish_process_t *ps, struct pid_on_target2 *pids) { - ebpf_process_stat_t accumulator; + ebpf_publish_process_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); while (pids) { - ebpf_process_stat_t *pps = &pids->ps; + ebpf_publish_process_t *pps = &pids->ps; accumulator.exit_call += pps->exit_call; accumulator.release_call += pps->release_call; @@ -781,7 +786,7 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_ * @param values structure with values that will be sent to netdata * @param em the structure with thread information */ -static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em) +static void ebpf_send_specific_process_data(char *type, ebpf_publish_process_t *values, ebpf_module_t *em) { ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name, @@ -1031,24 +1036,24 @@ static void ebpf_send_systemd_process_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_PROCESS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_PROCESS, ""); write_chart_dimension("calls", ect->publish_systemd_ps.create_process); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_THREAD); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_THREAD, ""); write_chart_dimension("calls", ect->publish_systemd_ps.create_thread); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_EXIT); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_EXIT, ""); write_chart_dimension("calls", ect->publish_systemd_ps.exit_call); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_CLOSE); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_CLOSE, ""); write_chart_dimension("calls", ect->publish_systemd_ps.release_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_TASK_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_TASK_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_ps.task_err); ebpf_write_end_chart(); } diff --git a/src/collectors/ebpf.plugin/ebpf_process.h b/src/collectors/ebpf.plugin/ebpf_process.h index 18ffec1ff..d2990cea6 100644 --- a/src/collectors/ebpf.plugin/ebpf_process.h +++ b/src/collectors/ebpf.plugin/ebpf_process.h @@ -33,16 +33,17 @@ #define NETDATA_CGROUP_PROCESS_EXIT_CONTEXT "cgroup.task_exit" #define NETDATA_CGROUP_PROCESS_ERROR_CONTEXT "cgroup.task_error" -#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "systemd.services.process_create" -#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "systemd.services.thread_create" -#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "systemd.services.task_close" -#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "systemd.services.task_exit" -#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "systemd.services.task_error" +#define NETDATA_SYSTEMD_PROCESS_CREATE_CONTEXT "systemd.service.process_create" +#define NETDATA_SYSTEMD_THREAD_CREATE_CONTEXT "systemd.service.thread_create" +#define NETDATA_SYSTEMD_PROCESS_CLOSE_CONTEXT "systemd.service.task_close" +#define NETDATA_SYSTEMD_PROCESS_EXIT_CONTEXT "systemd.service.task_exit" +#define NETDATA_SYSTEMD_PROCESS_ERROR_CONTEXT "systemd.service.task_error" #define NETDATA_EBPF_CGROUP_UPDATE 30 enum netdata_ebpf_stats_order { NETDATA_EBPF_ORDER_STAT_THREADS = 140000, + NETDATA_EBPF_ORDER_PIDS, NETDATA_EBPF_ORDER_STAT_LIFE_TIME, NETDATA_EBPF_ORDER_STAT_LOAD_METHOD, NETDATA_EBPF_ORDER_STAT_KERNEL_MEMORY, diff --git a/src/collectors/ebpf.plugin/ebpf_shm.c b/src/collectors/ebpf.plugin/ebpf_shm.c index 8e1999526..ac44549b2 100644 --- a/src/collectors/ebpf.plugin/ebpf_shm.c +++ b/src/collectors/ebpf.plugin/ebpf_shm.c @@ -7,7 +7,7 @@ static char *shm_dimension_name[NETDATA_SHM_END] = { "get", "at", "dt", "ctl" }; static netdata_syscall_stat_t shm_aggregated_data[NETDATA_SHM_END]; static netdata_publish_syscall_t shm_publish_aggregated[NETDATA_SHM_END]; -netdata_publish_shm_t *shm_vector = NULL; +netdata_ebpf_shm_t *shm_vector = NULL; static netdata_idx_t shm_hash_values[NETDATA_SHM_END]; static netdata_idx_t *shm_values = NULL; @@ -287,9 +287,9 @@ static void ebpf_obsolete_specific_shm_charts(char *type, int update_every); */ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMGET_CHART, + "", "Calls to syscall shmget(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -298,9 +298,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20191, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMAT_CHART, + "", "Calls to syscall shmat(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -309,9 +309,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20192, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMDT_CHART, + "", "Calls to syscall shmdt(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -320,9 +320,9 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em, char *id) 20193, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SHMCTL_CHART, + "", "Calls to syscall shmctl(2).", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_IPC_SHM_GROUP, @@ -453,6 +453,10 @@ static void ebpf_shm_exit(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SHM_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_shm.thread) nd_thread_signal_cancel(ebpf_read_shm.thread); @@ -506,16 +510,23 @@ static void ebpf_shm_exit(void *pptr) * @param out the vector with read values. * @param maps_per_core do I need to read all cores? */ -static void shm_apps_accumulator(netdata_publish_shm_t *out, int maps_per_core) +static void shm_apps_accumulator(netdata_ebpf_shm_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_shm_t *total = &out[0]; + netdata_ebpf_shm_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_shm_t *w = &out[i]; + netdata_ebpf_shm_t *w = &out[i]; total->get += w->get; total->at += w->at; total->dt += w->dt; total->ctl += w->ctl; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -528,7 +539,7 @@ static void shm_apps_accumulator(netdata_publish_shm_t *out, int maps_per_core) */ static void ebpf_update_shm_cgroup() { - netdata_publish_shm_t *cv = shm_vector; + netdata_ebpf_shm_t *cv = shm_vector; size_t length = sizeof(netdata_publish_shm_t); ebpf_cgroup_target_t *ect; @@ -541,12 +552,12 @@ static void ebpf_update_shm_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_shm_t *out = &pids->shm; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_shm_t *in = &local_pid->shm; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *in = local_pid->shm; + if (!in) + continue; - memcpy(out, in, sizeof(netdata_publish_shm_t)); - } + memcpy(out, in, sizeof(netdata_publish_shm_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -558,12 +569,13 @@ static void ebpf_update_shm_cgroup() * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_shm_apps_table(int maps_per_core, int max_period) +static void ebpf_read_shm_apps_table(int maps_per_core, uint32_t max_period) { - netdata_publish_shm_t *cv = shm_vector; + netdata_ebpf_shm_t *cv = shm_vector; int fd = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; - size_t length = sizeof(netdata_publish_shm_t); + size_t length = sizeof(netdata_ebpf_shm_t); if (maps_per_core) length *= ebpf_nprocs; @@ -575,18 +587,22 @@ static void ebpf_read_shm_apps_table(int maps_per_core, int max_period) shm_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, 0); - if (!local_pid) - goto end_shm_loop; - + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *publish = local_pid->shm; + if (!publish) + local_pid->shm = publish = ebpf_shm_allocate_publish(); - netdata_publish_shm_t *publish = &local_pid->shm; if (!publish->ct || publish->ct != cv->ct) { memcpy(publish, &cv[0], sizeof(netdata_publish_shm_t)); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period){ - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_SHM_IDX); + ebpf_shm_release_publish(publish); + local_pid->shm = NULL; + } } end_shm_loop: @@ -654,23 +670,17 @@ static void ebpf_shm_read_global_table(netdata_idx_t *stats, int maps_per_core) static void ebpf_shm_sum_pids(netdata_publish_shm_t *shm, struct ebpf_pid_on_target *root) { memset(shm, 0, sizeof(netdata_publish_shm_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *pid_stat = ebpf_get_pid_entry(pid, 0); - if (pid_stat) { - netdata_publish_shm_t *w = &pid_stat->shm; - shm->get += w->get; - shm->at += w->at; - shm->dt += w->dt; - shm->ctl += w->ctl; - - // reset for next collection. - w->get = 0; - w->at = 0; - w->dt = 0; - w->ctl = 0; - } - root = root->next; + ebpf_pid_data_t *pid_stat = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SHM_IDX); + netdata_publish_shm_t *w = pid_stat->shm; + if (!w) + continue; + + shm->get += w->get; + shm->at += w->at; + shm->dt += w->dt; + shm->ctl += w->ctl; } } @@ -941,19 +951,19 @@ static void ebpf_send_systemd_shm_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMGET_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMGET_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.get); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMAT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMAT_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.at); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMDT_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMDT_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.dt); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SHMCTL_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_SHMCTL_CHART, ""); write_chart_dimension("calls", (long long)ect->publish_shm.ctl); ebpf_write_end_chart(); } @@ -1060,13 +1070,17 @@ void *ebpf_read_shm_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_SHM_IDX] = shm_maps[NETDATA_PID_SHM_TABLE].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -1325,6 +1339,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em) */ void *ebpf_shm_thread(void *ptr) { + pids_fd[EBPF_PIDS_SHM_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_shm_exit) cleanup_ptr = em; @@ -1363,7 +1378,8 @@ void *ebpf_shm_thread(void *ptr) ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD); pthread_mutex_unlock(&lock); - ebpf_read_shm.thread = nd_thread_create(ebpf_read_shm.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_shm_thread, em); + ebpf_read_shm.thread = nd_thread_create(ebpf_read_shm.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_shm_thread, em); shm_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_shm.h b/src/collectors/ebpf.plugin/ebpf_shm.h index 5a670b1b5..6f89faa9e 100644 --- a/src/collectors/ebpf.plugin/ebpf_shm.h +++ b/src/collectors/ebpf.plugin/ebpf_shm.h @@ -23,21 +23,33 @@ #define NETDATA_CGROUP_SHM_DT_CONTEXT "cgroup.shmdt" #define NETDATA_CGROUP_SHM_CTL_CONTEXT "cgroup.shmctl" -#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "systemd.services.shmget" -#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "systemd.services.shmat" -#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "systemd.services.shmdt" -#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "systemd.services.shmctl" +#define NETDATA_SYSTEMD_SHM_GET_CONTEXT "systemd.service.shmget" +#define NETDATA_SYSTEMD_SHM_AT_CONTEXT "systemd.service.shmat" +#define NETDATA_SYSTEMD_SHM_DT_CONTEXT "systemd.service.shmdt" +#define NETDATA_SYSTEMD_SHM_CTL_CONTEXT "systemd.service.shmctl" -typedef struct netdata_publish_shm { +typedef struct __attribute__((packed)) netdata_publish_shm { uint64_t ct; - char name[TASK_COMM_LEN]; - uint64_t get; - uint64_t at; - uint64_t dt; - uint64_t ctl; + uint32_t get; + uint32_t at; + uint32_t dt; + uint32_t ctl; } netdata_publish_shm_t; +typedef struct netdata_ebpf_shm { + uint64_t ct; + uint32_t tgid; + uint32_t uid; + uint32_t gid; + char name[TASK_COMM_LEN]; + + uint32_t get; + uint32_t at; + uint32_t dt; + uint32_t ctl; +} netdata_ebpf_shm_t; + enum shm_tables { NETDATA_PID_SHM_TABLE, NETDATA_SHM_CONTROLLER, diff --git a/src/collectors/ebpf.plugin/ebpf_socket.c b/src/collectors/ebpf.plugin/ebpf_socket.c index 9a55f7be4..5b87a3256 100644 --- a/src/collectors/ebpf.plugin/ebpf_socket.c +++ b/src/collectors/ebpf.plugin/ebpf_socket.c @@ -497,6 +497,10 @@ static void ebpf_socket_free(ebpf_module_t *em ) ebpf_update_stats(&plugin_statistics, em); ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE); pthread_mutex_unlock(&ebpf_exit_cleanup); + + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SOCKET_IDX); + pthread_mutex_unlock(&lock); } /** @@ -509,9 +513,9 @@ static void ebpf_socket_free(ebpf_module_t *em ) static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) { int order = 20080; - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_CONNECTION_TCP_V4, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", "Calls to tcp_v4_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, @@ -521,9 +525,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) update_every); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_CONNECTION_TCP_V6, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + "", "Calls to tcp_v6_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, @@ -533,31 +537,20 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_RECV, - "Bits received", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, - order++, - update_every); - - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bits sent", + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT, order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, + "", "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -566,9 +559,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, + "", "Calls to tcp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -577,9 +570,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, + "", "Calls to tcp_retransmit", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -588,9 +581,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, + "", "Calls to udp_sendmsg", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -599,9 +592,9 @@ static void ebpf_obsolete_systemd_socket_charts(int update_every, char *id) order++, update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, - NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + ebpf_write_chart_obsolete(id, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, + "", "Calls to udp_recvmsg", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, @@ -678,23 +671,12 @@ void ebpf_socket_obsolete_apps_charts(struct ebpf_module *em) ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, w->clean_name, - "_ebpf_sock_bytes_sent", - "Bits sent.", + "_ebpf_sock_bandwidth", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_sent", - order++, - update_every); - - ebpf_write_chart_obsolete(NETDATA_APP_FAMILY, - w->clean_name, - "_ebpf_sock_bytes_received", - "Bits received.", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_received", + "app.ebpf_sock_total_bandwidth", order++, update_every); @@ -1056,18 +1038,14 @@ void ebpf_socket_send_apps_data() if (tcp_v6_connect_address.type == 'T') { ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_call_tcp_v6_connection"); - write_chart_dimension("calls", (collected_number) values->call_tcp_v6_connection); + write_chart_dimension("connections", (collected_number) values->call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_sent"); - // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension("bandwidth", ebpf_socket_bytes2bits(values->bytes_sent)); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_received"); + ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bandwidth"); // We multiply by 0.008, because we read bytes, but we display bits - write_chart_dimension("bandwidth", ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("received", ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("sent", ebpf_socket_bytes2bits(values->bytes_sent)); ebpf_write_end_chart(); ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_sendmsg"); @@ -1273,33 +1251,19 @@ void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr) ebpf_write_chart_cmd(NETDATA_APP_FAMILY, w->clean_name, - "_ebpf_sock_bytes_sent", - "Bits sent.", - EBPF_COMMON_UNITS_KILOBITS, - NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_sent", - order++, - update_every, - NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart_labels("app_group", w->name, RRDLABEL_SRC_AUTO); - ebpf_commit_label(); - fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); - - ebpf_write_chart_cmd(NETDATA_APP_FAMILY, - w->clean_name, - "_ebpf_sock_bytes_received", - "Bits received.", + "_ebpf_sock_bandwidth", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_STACKED, - "app.ebpf_sock_bytes_received", + "app.ebpf_sock_total_bandwidth", order++, update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("app_group", w->name, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION received '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION sent '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); ebpf_write_chart_cmd(NETDATA_APP_FAMILY, w->clean_name, @@ -1714,6 +1678,7 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) time_t update_time = time(NULL); while (bpf_map_get_next_key(fd, &key, &next_key) == 0) { test = bpf_map_lookup_elem(fd, &key, values); + bool deleted = true; if (test < 0) { goto end_socket_loop; } @@ -1723,7 +1688,6 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) } ebpf_hash_socket_accumulator(values, end); - ebpf_socket_fill_publish_apps(key.pid, values); // We update UDP to show info with charts, but we do not show them with functions /* @@ -1767,14 +1731,17 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) } uint64_t prev_period = socket_ptr->data.current_timestamp; memcpy(&socket_ptr->data, &values[0], sizeof(netdata_socket_t)); - if (translate) + if (translate) { ebpf_socket_translate(socket_ptr, &key); - else { // Check socket was updated + deleted = false; + } else { // Check socket was updated + deleted = false; if (prev_period) { if (values[0].current_timestamp > prev_period) // Socket updated socket_ptr->last_update = update_time; else if ((update_time - socket_ptr->last_update) > em->update_every) { // Socket was not updated since last read + deleted = true; JudyLDel(&pid_ptr->socket_stats.JudyLArray, values[0].first_timestamp, PJE0); aral_freez(aral_socket_table, socket_ptr); } @@ -1785,7 +1752,19 @@ static void ebpf_update_array_vectors(ebpf_module_t *em) rw_spinlock_write_unlock(&pid_ptr->socket_stats.rw_spinlock); rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock); -end_socket_loop: +end_socket_loop: ; // the empty statement is here to allow code to be compiled by old compilers + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key.pid, 0, values[0].name, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *curr = local_pid->socket; + if (!curr) + local_pid->socket = curr = ebpf_socket_allocate_publish(); + + if (!deleted) + ebpf_socket_fill_publish_apps(curr, values); + else { + ebpf_release_pid_data(local_pid, fd, key.pid, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_release_publish(curr); + local_pid->socket = NULL; + } memset(values, 0, length); memcpy(&key, &next_key, sizeof(key)); } @@ -1805,23 +1784,22 @@ void ebpf_socket_resume_apps_data() ebpf_socket_publish_apps_t *values = &w->socket; memset(&w->socket, 0, sizeof(ebpf_socket_publish_apps_t)); - while (move) { + for (; move; move = move->next) { int32_t pid = move->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_socket_publish_apps_t *ws = &local_pid->socket; - values->call_tcp_v4_connection = ws->call_tcp_v4_connection; - values->call_tcp_v6_connection = ws->call_tcp_v6_connection; - values->bytes_sent = ws->bytes_sent; - values->bytes_received = ws->bytes_received; - values->call_tcp_sent = ws->call_tcp_sent; - values->call_tcp_received = ws->call_tcp_received; - values->retransmit = ws->retransmit; - values->call_udp_sent = ws->call_udp_sent; - values->call_udp_received = ws->call_udp_received; - } - - move = move->next; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *ws = local_pid->socket; + if (!ws) + continue; + + values->call_tcp_v4_connection = ws->call_tcp_v4_connection; + values->call_tcp_v6_connection = ws->call_tcp_v6_connection; + values->bytes_sent = ws->bytes_sent; + values->bytes_received = ws->bytes_received; + values->call_tcp_sent = ws->call_tcp_sent; + values->call_tcp_received = ws->call_tcp_received; + values->retransmit = ws->retransmit; + values->call_udp_sent = ws->call_udp_sent; + values->call_udp_received = ws->call_udp_received; } } } @@ -1846,6 +1824,9 @@ void *ebpf_read_socket_thread(void *ptr) int update_every = em->update_every; int counter = update_every - 1; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; uint32_t running_time = 0; uint32_t lifetime = em->lifetime; @@ -2009,14 +1990,8 @@ static void ebpf_socket_read_hash_global_tables(netdata_idx_t *stats, int maps_p * @param current_pid the PID that I am updating * @param ns the structure with data read from memory. */ -void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns) +void ebpf_socket_fill_publish_apps(ebpf_socket_publish_apps_t *curr, netdata_socket_t *ns) { - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(current_pid, 0); - if (!local_pid) - return; - - ebpf_socket_publish_apps_t *curr = &local_pid->socket; - curr->bytes_sent = ns->tcp.tcp_bytes_sent; curr->bytes_received = ns->tcp.tcp_bytes_received; curr->call_tcp_sent = ns->tcp.call_tcp_sent; @@ -2045,21 +2020,21 @@ static void ebpf_update_socket_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; ebpf_socket_publish_apps_t *publish = &ect->publish_socket; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - ebpf_socket_publish_apps_t *in = &local_pid->socket; - - publish->bytes_sent = in->bytes_sent; - publish->bytes_received = in->bytes_received; - publish->call_tcp_sent = in->call_tcp_sent; - publish->call_tcp_received = in->call_tcp_received; - publish->retransmit = in->retransmit; - publish->call_udp_sent = in->call_udp_sent; - publish->call_udp_received = in->call_udp_received; - publish->call_close = in->call_close; - publish->call_tcp_v4_connection = in->call_tcp_v4_connection; - publish->call_tcp_v6_connection = in->call_tcp_v6_connection; - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_MODULE_SOCKET_IDX); + ebpf_socket_publish_apps_t *in = local_pid->socket; + if (!in) + continue; + + publish->bytes_sent = in->bytes_sent; + publish->bytes_received = in->bytes_received; + publish->call_tcp_sent = in->call_tcp_sent; + publish->call_tcp_received = in->call_tcp_received; + publish->retransmit = in->retransmit; + publish->call_udp_sent = in->call_udp_sent; + publish->call_udp_received = in->call_udp_received; + publish->call_close = in->call_close; + publish->call_tcp_v4_connection = in->call_tcp_v4_connection; + publish->call_tcp_v6_connection = in->call_tcp_v6_connection; } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -2121,119 +2096,128 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) { int order_basis = 5300; char *label = (!strncmp(type, "cgroup_", 7)) ? &type[7] : type; - ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, - "Calls to tcp_v4_connection", - EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", + "Calls to tcp_v4_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); if (tcp_v6_connect_address.type == 'T') { - ebpf_create_chart(type, - NETDATA_NET_APPS_CONNECTION_TCP_V6, - "Calls to tcp_v6_connection", - EBPF_COMMON_UNITS_CONNECTIONS, - NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], - 1, - update_every, - NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + "", + "Calls to tcp_v6_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); } - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, - "Bits received", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", + EBPF_COMMON_UNITS_KILOBITS, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, - "Bits sent", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - socket_publish_aggregated, 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION received '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + fprintf(stdout, "DIMENSION sent '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, + "", + "Calls to tcp_cleanup_rbuf.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, - "Calls to tcp_cleanup_rbuf.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, + "", + "Calls to tcp_sendmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, - "Calls to tcp_sendmsg.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - socket_publish_aggregated, 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, + "", + "Calls to tcp_retransmit.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, - "Calls to tcp_retransmit.", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, + "", + "Calls to udp_sendmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, - "Calls to udp_sendmsg", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); - ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); - ebpf_commit_label(); - - ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, - "Calls to udp_recvmsg", - EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_CGROUP_NET_GROUP, - NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, - NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, - ebpf_create_global_dimension, - &socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF], 1, - update_every, NETDATA_EBPF_MODULE_NAME_SOCKET); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); + + ebpf_write_chart_cmd(type, + NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, + "", + "Calls to udp_recvmsg.", + EBPF_COMMON_UNITS_CALLS_PER_SEC, + NETDATA_CGROUP_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every, + NETDATA_EBPF_MODULE_NAME_SOCKET); ebpf_create_chart_labels("cgroup_name", label, RRDLABEL_SRC_AUTO); ebpf_commit_label(); + fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]); } /** @@ -2247,57 +2231,65 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every) static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) { int order_basis = 5300; - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "", "Calls to tcp_v4_connection", - EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); + ebpf_write_chart_obsolete(type, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + "", + "Calls to tcp_v4_connection", + EBPF_COMMON_UNITS_CONNECTIONS, + NETDATA_APPS_NET_GROUP, + NETDATA_EBPF_CHART_TYPE_LINE, + NETDATA_CGROUP_TCP_V4_CONN_CONTEXT, + NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, + update_every); if (tcp_v6_connect_address.type == 'T') { ebpf_write_chart_obsolete(type, - NETDATA_NET_APPS_CONNECTION_TCP_V6, + NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, "", "Calls to tcp_v6_connection", EBPF_COMMON_UNITS_CONNECTIONS, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, + NETDATA_CGROUP_TCP_V6_CONN_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); } - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "", "Bits received", - EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, - NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT, "","Bits sent", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + "", + "Bandwidth.", EBPF_COMMON_UNITS_KILOBITS, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "", "Calls to tcp_cleanup_rbuf.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, "", + "Calls to tcp_cleanup_rbuf.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "", "Calls to tcp_sendmsg.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, "", + "Calls to tcp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "", "Calls to tcp_retransmit.", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, "", + "Calls to tcp_retransmit.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "", "Calls to udp_sendmsg", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, "", + "Calls to udp_sendmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, - NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, + NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); - ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "", "Calls to udp_recvmsg", + ebpf_write_chart_obsolete(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, "", + "Calls to udp_recvmsg.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE, - NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, + NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every); } @@ -2311,51 +2303,39 @@ static void ebpf_obsolete_specific_socket_charts(char *type, int update_every) */ static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values) { - ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4].name, - (long long) values->call_tcp_v4_connection); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, ""); + write_chart_dimension("connections", (long long) values->call_tcp_v4_connection); ebpf_write_end_chart(); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, ""); - write_chart_dimension( - socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, (long long)values->call_tcp_v6_connection); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, ""); + write_chart_dimension("connections", (long long)values->call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, - (long long) ebpf_socket_bytes2bits(values->bytes_sent)); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, ""); + write_chart_dimension("received", (long long) ebpf_socket_bytes2bits(values->bytes_received)); + write_chart_dimension("sent", (long long) ebpf_socket_bytes2bits(values->bytes_sent)); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, - (long long) ebpf_socket_bytes2bits(values->bytes_received)); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_tcp_received); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name, - (long long) values->call_tcp_sent); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_tcp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name, - (long long) values->call_tcp_received); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, ""); + write_chart_dimension("calls", (long long) values->retransmit); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name, - (long long) values->retransmit); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_udp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name, - (long long) values->call_udp_sent); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, ""); - write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name, - (long long) values->call_udp_received); + ebpf_write_begin_chart(type, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, ""); + write_chart_dimension("calls", (long long) values->call_udp_received); ebpf_write_end_chart(); } @@ -2378,8 +2358,8 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_CONNECTION_TCP_V4, - .dimension = EBPF_COMMON_UNITS_CONNECTIONS + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, + .dimension = "connections" }; static ebpf_systemd_args_t data_tcp_v6 = { @@ -2392,36 +2372,22 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_CONNECTION_TCP_V6, - .dimension = "connection" + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, + .dimension = "connections" }; - static ebpf_systemd_args_t data_bandwith_recv = { - .title = "Bits received", + static ebpf_systemd_args_t data_bandwidth = { + .title = "Bandwidth.", .units = EBPF_COMMON_UNITS_KILOBITS, .family = NETDATA_APPS_NET_GROUP, .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, .order = 20082, .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT, + .context = NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_RECV, - .dimension = "connection" - }; - - static ebpf_systemd_args_t data_bandwith_sent = { - .title = "Bits sent", - .units = EBPF_COMMON_UNITS_KILOBITS, - .family = NETDATA_APPS_NET_GROUP, - .charttype = NETDATA_EBPF_CHART_TYPE_STACKED, - .order = 20083, - .algorithm = EBPF_CHART_ALGORITHM_INCREMENTAL, - .context = NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT, - .module = NETDATA_EBPF_MODULE_NAME_SOCKET, - .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_SENT, - .dimension = EBPF_COMMON_UNITS_KILOBITS + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, + .dimension = "received,sent" }; static ebpf_systemd_args_t data_tcp_cleanup = { @@ -2434,7 +2400,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, .dimension = "calls" }; @@ -2448,7 +2414,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, .dimension = "calls" }; @@ -2462,7 +2428,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, .dimension = "calls" }; @@ -2476,7 +2442,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, .dimension = "calls" }; @@ -2490,13 +2456,13 @@ static void ebpf_create_systemd_socket_charts(int update_every) .context = NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT, .module = NETDATA_EBPF_MODULE_NAME_SOCKET, .update_every = 0, - .suffix = NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, + .suffix = NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, .dimension = "calls" }; if (!data_tcp_v4.update_every) - data_tcp_v4.update_every = data_tcp_v6.update_every = data_bandwith_recv.update_every = - data_bandwith_sent.update_every = data_tcp_cleanup.update_every = data_tcp_sendmsg.update_every = + data_tcp_v4.update_every = data_tcp_v6.update_every = data_bandwidth.update_every = + data_tcp_cleanup.update_every = data_tcp_sendmsg.update_every = data_tcp_retransmit.update_every = data_udp_send.update_every = data_udp_recv.update_every = update_every; ebpf_cgroup_target_t *w; @@ -2504,8 +2470,8 @@ static void ebpf_create_systemd_socket_charts(int update_every) if (unlikely(!w->systemd || w->flags & NETDATA_EBPF_SERVICES_HAS_SOCKET_CHART)) continue; - data_tcp_v4.id = data_tcp_v6.id = data_bandwith_recv.id = - data_bandwith_sent.id = data_tcp_cleanup.id = data_tcp_sendmsg.id = + data_tcp_v4.id = data_tcp_v6.id = data_bandwidth.id = + data_tcp_cleanup.id = data_tcp_sendmsg.id = data_tcp_retransmit.id = data_udp_send.id = data_udp_recv.id = w->name; ebpf_create_charts_on_systemd(&data_tcp_v4); @@ -2513,8 +2479,7 @@ static void ebpf_create_systemd_socket_charts(int update_every) ebpf_create_charts_on_systemd(&data_tcp_v6); } - ebpf_create_charts_on_systemd(&data_bandwith_recv); - ebpf_create_charts_on_systemd(&data_bandwith_sent); + ebpf_create_charts_on_systemd(&data_bandwidth); ebpf_create_charts_on_systemd(&data_tcp_cleanup); @@ -2543,41 +2508,38 @@ static void ebpf_send_systemd_socket_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_CONNECTION_TCP_V4); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4, ""); write_chart_dimension("connections", (long long)ect->publish_socket.call_tcp_v4_connection); ebpf_write_end_chart(); if (tcp_v6_connect_address.type == 'T') { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_CONNECTION_TCP_V6); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6, ""); write_chart_dimension("connections", (long long)ect->publish_socket.call_tcp_v6_connection); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_SENT); - write_chart_dimension("bits", (long long)ect->publish_socket.bytes_sent); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH, ""); + write_chart_dimension("received", (long long)ect->publish_socket.bytes_received); + write_chart_dimension("sent", (long long)ect->publish_socket.bytes_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_RECV); - write_chart_dimension("bits", (long long)ect->publish_socket.bytes_received); - ebpf_write_end_chart(); - - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_tcp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_tcp_received); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT, ""); write_chart_dimension("calls", (long long)ect->publish_socket.retransmit); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_udp_sent); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS, ""); write_chart_dimension("calls", (long long)ect->publish_socket.call_udp_received); ebpf_write_end_chart(); } @@ -2888,6 +2850,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em) */ void *ebpf_socket_thread(void *ptr) { + pids_fd[EBPF_PIDS_SOCKET_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_socket_exit) cleanup_ptr = em; @@ -2917,7 +2880,6 @@ void *ebpf_socket_thread(void *ptr) ebpf_adjust_thread_load(em, default_btf); #endif if (ebpf_socket_load_bpf(em)) { - pthread_mutex_unlock(&lock); goto endsocket; } diff --git a/src/collectors/ebpf.plugin/ebpf_socket.h b/src/collectors/ebpf.plugin/ebpf_socket.h index b36ed064c..e01126035 100644 --- a/src/collectors/ebpf.plugin/ebpf_socket.h +++ b/src/collectors/ebpf.plugin/ebpf_socket.h @@ -112,16 +112,15 @@ typedef enum ebpf_socket_idx { #define NETDATA_UDP_FUNCTION_BITS "total_udp_bandwidth" #define NETDATA_UDP_FUNCTION_ERROR "udp_error" -// Charts created on Apps submenu -#define NETDATA_NET_APPS_CONNECTION_TCP_V4 "outbound_conn_v4" -#define NETDATA_NET_APPS_CONNECTION_TCP_V6 "outbound_conn_v6" -#define NETDATA_NET_APPS_BANDWIDTH_SENT "total_bandwidth_sent" -#define NETDATA_NET_APPS_BANDWIDTH_RECV "total_bandwidth_recv" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS "bandwidth_tcp_send" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS "bandwidth_tcp_recv" -#define NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT "bandwidth_tcp_retransmit" -#define NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send" -#define NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv" +// Charts created (id or suffix) +#define NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V4 "outbound_conn_v4" +#define NETDATA_SOCK_ID_OR_SUFFIX_CONNECTION_TCP_V6 "outbound_conn_v6" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH "total_bandwidth" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_SEND_CALLS "bandwidth_tcp_send" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RECV_CALLS "bandwidth_tcp_recv" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_TCP_RETRANSMIT "bandwidth_tcp_retransmit" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send" +#define NETDATA_SOCK_ID_OR_SUFFIX_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv" // Port range #define NETDATA_MINIMUM_PORT_VALUE 1 @@ -137,30 +136,28 @@ typedef enum ebpf_socket_idx { // Contexts #define NETDATA_CGROUP_TCP_V4_CONN_CONTEXT "cgroup.net_conn_ipv4" #define NETDATA_CGROUP_TCP_V6_CONN_CONTEXT "cgroup.net_conn_ipv6" -#define NETDATA_CGROUP_SOCKET_BYTES_RECV_CONTEXT "cgroup.net_bytes_recv" -#define NETDATA_CGROUP_SOCKET_BYTES_SEND_CONTEXT "cgroup.net_bytes_send" +#define NETDATA_CGROUP_SOCKET_TCP_BANDWIDTH_CONTEXT "cgroup.net_total_bandwidth" #define NETDATA_CGROUP_SOCKET_TCP_RECV_CONTEXT "cgroup.net_tcp_recv" #define NETDATA_CGROUP_SOCKET_TCP_SEND_CONTEXT "cgroup.net_tcp_send" #define NETDATA_CGROUP_SOCKET_TCP_RETRANSMIT_CONTEXT "cgroup.net_retransmit" #define NETDATA_CGROUP_SOCKET_UDP_RECV_CONTEXT "cgroup.net_udp_recv" #define NETDATA_CGROUP_SOCKET_UDP_SEND_CONTEXT "cgroup.net_udp_send" -#define NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT "systemd.services.net_conn_ipv4" -#define NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT "systemd.services.net_conn_ipv6" -#define NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT "systemd.services.net_bytes_recv" -#define NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT "systemd.services.net_bytes_send" -#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "systemd.services.net_tcp_recv" -#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "systemd.services.net_tcp_send" -#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "systemd.services.net_retransmit" -#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "systemd.services.net_udp_recv" -#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "systemd.services.net_udp_send" +#define NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT "systemd.service.net_conn_ipv4" +#define NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT "systemd.service.net_conn_ipv6" +#define NETDATA_SERVICES_SOCKET_TCP_BANDWIDTH_CONTEXT "systemd.service.net_total_bandwidth" +#define NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT "systemd.service.net_tcp_recv" +#define NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT "systemd.service.net_tcp_send" +#define NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT "systemd.service.net_retransmit" +#define NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT "systemd.service.net_udp_recv" +#define NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT "systemd.service.net_udp_send" // ARAL name #define NETDATA_EBPF_SOCKET_ARAL_NAME "ebpf_socket" #define NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME "ebpf_pid_socket" #define NETDATA_EBPF_SOCKET_ARAL_TABLE_NAME "ebpf_socket_tbl" -typedef struct ebpf_socket_publish_apps { +typedef struct __attribute__((packed)) ebpf_socket_publish_apps { // Data read uint64_t bytes_sent; // Bytes sent uint64_t bytes_received; // Bytes received @@ -345,8 +342,7 @@ void ebpf_parse_service_name_section(struct config *cfg); void ebpf_parse_ips_unsafe(char *ptr); void ebpf_parse_ports(char *ptr); void ebpf_socket_read_open_connections(BUFFER *buf, struct ebpf_module *em); -void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns); - +void ebpf_socket_fill_publish_apps(ebpf_socket_publish_apps_t *curr, netdata_socket_t *ns); extern struct config socket_config; extern netdata_ebpf_targets_t socket_targets[]; diff --git a/src/collectors/ebpf.plugin/ebpf_swap.c b/src/collectors/ebpf.plugin/ebpf_swap.c index 1e2a7cc60..933353178 100644 --- a/src/collectors/ebpf.plugin/ebpf_swap.c +++ b/src/collectors/ebpf.plugin/ebpf_swap.c @@ -10,7 +10,7 @@ static netdata_publish_syscall_t swap_publish_aggregated[NETDATA_SWAP_END]; static netdata_idx_t swap_hash_values[NETDATA_SWAP_END]; static netdata_idx_t *swap_values = NULL; -netdata_publish_swap_t *swap_vector = NULL; +netdata_ebpf_swap_t *swap_vector = NULL; struct config swap_config = { .first_section = NULL, .last_section = NULL, @@ -274,9 +274,9 @@ static void ebpf_obsolete_specific_swap_charts(char *type, int update_every); */ static void ebpf_obsolete_swap_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_MEM_SWAP_READ_CHART, + "", "Calls to function swap_readpage.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_SYSTEM_SWAP_SUBMENU, @@ -285,9 +285,9 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em, char *id) 20191, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_MEM_SWAP_WRITE_CHART, + "", "Calls to function swap_writepage.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_SYSTEM_SWAP_SUBMENU, @@ -391,8 +391,13 @@ static void ebpf_obsolete_swap_global(ebpf_module_t *em) */ static void ebpf_swap_exit(void *ptr) { + pids_fd[EBPF_PIDS_SWAP_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_SWAP_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_swap.thread) nd_thread_signal_cancel(ebpf_read_swap.thread); @@ -447,14 +452,21 @@ static void ebpf_swap_exit(void *ptr) * @param out the vector with read values. * @param maps_per_core do I need to read all cores? */ -static void swap_apps_accumulator(netdata_publish_swap_t *out, int maps_per_core) +static void swap_apps_accumulator(netdata_ebpf_swap_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_swap_t *total = &out[0]; + netdata_ebpf_swap_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_swap_t *w = &out[i]; + netdata_ebpf_swap_t *w = &out[i]; total->write += w->write; total->read += w->read; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } @@ -472,12 +484,11 @@ static void ebpf_update_swap_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_swap_t *out = &pids->swap; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_swap_t *in = &local_pid->swap; - - memcpy(out, in, sizeof(netdata_publish_swap_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *in = local_pid->swap; + if (!in) + continue; + memcpy(out, in, sizeof(netdata_publish_swap_t)); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -496,15 +507,15 @@ static void ebpf_swap_sum_pids(netdata_publish_swap_t *swap, struct ebpf_pid_on_ uint64_t local_read = 0; uint64_t local_write = 0; - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_swap_t *w = &local_pid->swap; - local_write += w->write; - local_read += w->read; - } - root = root->next; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *w = local_pid->swap; + if (!w) + continue; + + local_write += w->write; + local_read += w->read; } // These conditions were added, because we are using incremental algorithm @@ -532,12 +543,13 @@ void ebpf_swap_resume_apps_data() { * Read the apps table and store data inside the structure. * * @param maps_per_core do I need to read all cores? + * @param max_period limit of iterations without updates before remove data from hash table */ -static void ebpf_read_swap_apps_table(int maps_per_core, int max_period) +static void ebpf_read_swap_apps_table(int maps_per_core, uint32_t max_period) { - netdata_publish_swap_t *cv = swap_vector; + netdata_ebpf_swap_t *cv = swap_vector; int fd = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; - size_t length = sizeof(netdata_publish_swap_t); + size_t length = sizeof(netdata_ebpf_swap_t); if (maps_per_core) length *= ebpf_nprocs; @@ -549,17 +561,22 @@ static void ebpf_read_swap_apps_table(int maps_per_core, int max_period) swap_apps_accumulator(cv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, cv->tgid); - if (!local_pid) - goto end_swap_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, cv->tgid, cv->name, EBPF_PIDS_SWAP_IDX); + netdata_publish_swap_t *publish = local_pid->swap; + if (!publish) + local_pid->swap = publish = ebpf_swap_allocate_publish_swap(); - netdata_publish_swap_t *publish = &local_pid->swap; if (!publish->ct || publish->ct != cv->ct) { memcpy(publish, cv, sizeof(netdata_publish_swap_t)); local_pid->not_updated = 0; - } else if (++local_pid->not_updated >= max_period) { - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + } else { + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_SWAP_IDX); + ebpf_swap_release_publish(publish); + local_pid->swap = NULL; + } } // We are cleaning to avoid passing data read from one process to other. @@ -587,13 +604,17 @@ void *ebpf_read_swap_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_SWAP_IDX] = swap_maps[NETDATA_PID_SWAP_TABLE].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); @@ -722,11 +743,11 @@ static void ebpf_send_systemd_swap_charts() continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_MEM_SWAP_READ_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_MEM_SWAP_READ_CHART, ""); write_chart_dimension("calls", (long long) ect->publish_systemd_swap.read); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_MEM_SWAP_WRITE_CHART); + ebpf_write_begin_chart(ect->name, NETDATA_MEM_SWAP_WRITE_CHART, ""); write_chart_dimension("calls", (long long) ect->publish_systemd_swap.write); ebpf_write_end_chart(); } @@ -1017,7 +1038,7 @@ void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr) */ static void ebpf_swap_allocate_global_vectors() { - swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_publish_swap_t)); + swap_vector = callocz((size_t)ebpf_nprocs, sizeof(netdata_ebpf_swap_t)); swap_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_idx_t)); diff --git a/src/collectors/ebpf.plugin/ebpf_swap.h b/src/collectors/ebpf.plugin/ebpf_swap.h index 92aecd29b..478b47adf 100644 --- a/src/collectors/ebpf.plugin/ebpf_swap.h +++ b/src/collectors/ebpf.plugin/ebpf_swap.h @@ -21,19 +21,26 @@ // Contexts #define NETDATA_CGROUP_SWAP_READ_CONTEXT "cgroup.swap_read" #define NETDATA_CGROUP_SWAP_WRITE_CONTEXT "cgroup.swap_write" -#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "systemd.services.swap_read" -#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "systemd.services.swap_write" +#define NETDATA_SYSTEMD_SWAP_READ_CONTEXT "systemd.service.swap_read" +#define NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT "systemd.service.swap_write" -typedef struct netdata_publish_swap { +typedef struct __attribute__((packed)) netdata_publish_swap { + uint64_t ct; + + uint32_t read; + uint32_t write; +} netdata_publish_swap_t; + +typedef struct netdata_ebpf_swap { uint64_t ct; uint32_t tgid; uint32_t uid; uint32_t gid; char name[TASK_COMM_LEN]; - uint64_t read; - uint64_t write; -} netdata_publish_swap_t; + uint32_t read; + uint32_t write; +} netdata_ebpf_swap_t; enum swap_tables { NETDATA_PID_SWAP_TABLE, diff --git a/src/collectors/ebpf.plugin/ebpf_vfs.c b/src/collectors/ebpf.plugin/ebpf_vfs.c index eea27192e..cf1f50e99 100644 --- a/src/collectors/ebpf.plugin/ebpf_vfs.c +++ b/src/collectors/ebpf.plugin/ebpf_vfs.c @@ -11,7 +11,7 @@ static char *vfs_id_names[NETDATA_KEY_PUBLISH_VFS_END] = { "vfs_unlink", "vfs_re static netdata_idx_t *vfs_hash_values = NULL; static netdata_syscall_stat_t vfs_aggregated_data[NETDATA_KEY_PUBLISH_VFS_END]; static netdata_publish_syscall_t vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_END]; -netdata_publish_vfs_t *vfs_vector = NULL; +netdata_ebpf_vfs_t *vfs_vector = NULL; static ebpf_local_maps_t vfs_maps[] = {{.name = "tbl_vfs_pid", .internal_input = ND_EBPF_DEFAULT_PID_SIZE, .user_input = 0, .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID, @@ -396,9 +396,9 @@ static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em); */ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_FILE_DELETED, + "", "Files deleted", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -407,9 +407,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20065, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, + "", "Write to disk", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -419,9 +419,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, + "", "Fails to write", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -431,9 +431,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, + "", "Read from disk", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -443,9 +443,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, + "", "Fails to read", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -455,9 +455,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, + "", "Bytes written on disk", EBPF_COMMON_UNITS_BYTES, NETDATA_VFS_GROUP, @@ -466,9 +466,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20070, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, + "", "Bytes read from disk", EBPF_COMMON_UNITS_BYTES, NETDATA_VFS_GROUP, @@ -477,9 +477,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) 20071, em->update_every); - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_FSYNC, + "", "Calls to vfs_fsync.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -489,9 +489,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, + "", "Sync error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -501,9 +501,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_OPEN, + "", "Calls to vfs_open.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -513,9 +513,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, + "", "Open error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -525,9 +525,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); } - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_CREATE, + "", "Calls to vfs_create.", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -537,9 +537,9 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em, char *id) em->update_every); if (em->mode < MODE_ENTRY) { - ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY, - id, + ebpf_write_chart_obsolete(id, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, + "", "Create error", EBPF_COMMON_UNITS_CALLS_PER_SEC, NETDATA_VFS_GROUP, @@ -881,6 +881,10 @@ static void ebpf_vfs_exit(void *pptr) ebpf_module_t *em = CLEANUP_FUNCTION_GET_PTR(pptr); if(!em) return; + pthread_mutex_lock(&lock); + collect_pids &= ~(1<<EBPF_MODULE_VFS_IDX); + pthread_mutex_unlock(&lock); + if (ebpf_read_vfs.thread) nd_thread_signal_cancel(ebpf_read_vfs.thread); @@ -1029,6 +1033,74 @@ static void ebpf_vfs_read_global_table(netdata_idx_t *stats, int maps_per_core) } /** + * Set VFS + * + * Set vfs structure with values from ebpf structure. + * + * @param vfs the output structure. + * @param w the input data. + */ +static inline void vfs_aggregate_set_vfs(netdata_publish_vfs_t *vfs, netdata_ebpf_vfs_t *w) +{ + vfs->write_call = w->write_call; + vfs->writev_call = w->writev_call; + vfs->read_call = w->read_call; + vfs->readv_call = w->readv_call; + vfs->unlink_call = w->unlink_call; + vfs->fsync_call = w->fsync_call; + vfs->open_call = w->open_call; + vfs->create_call = w->create_call; + + vfs->write_bytes = w->write_bytes; + vfs->writev_bytes = w->writev_bytes; + vfs->read_bytes = w->read_bytes; + vfs->readv_bytes = w->readv_bytes; + + vfs->write_err = w->write_err; + vfs->writev_err = w->writev_err; + vfs->read_err = w->read_err; + vfs->readv_err = w->readv_err; + vfs->unlink_err = w->unlink_err; + vfs->fsync_err = w->fsync_err; + vfs->open_err = w->open_err; + vfs->create_err = w->create_err; +} + +/** + * Aggregate Publish VFS + * + * Aggregate data from w source. + * + * @param vfs the output structure. + * @param w the input data. + */ +static inline void vfs_aggregate_publish_vfs(netdata_publish_vfs_t *vfs, netdata_publish_vfs_t *w) +{ + vfs->write_call += w->write_call; + vfs->writev_call += w->writev_call; + vfs->read_call += w->read_call; + vfs->readv_call += w->readv_call; + vfs->unlink_call += w->unlink_call; + vfs->fsync_call += w->fsync_call; + vfs->open_call += w->open_call; + vfs->create_call += w->create_call; + + vfs->write_bytes += w->write_bytes; + vfs->writev_bytes += w->writev_bytes; + vfs->read_bytes += w->read_bytes; + vfs->readv_bytes += w->readv_bytes; + + vfs->write_err += w->write_err; + vfs->writev_err += w->writev_err; + vfs->read_err += w->read_err; + vfs->readv_err += w->readv_err; + vfs->unlink_err += w->unlink_err; + vfs->fsync_err += w->fsync_err; + vfs->open_err += w->open_err; + vfs->create_err += w->create_err; +} + +/** * Sum PIDs * * Sum values for all targets. @@ -1038,63 +1110,17 @@ static void ebpf_vfs_read_global_table(netdata_idx_t *stats, int maps_per_core) */ static void ebpf_vfs_sum_pids(netdata_publish_vfs_t *vfs, struct ebpf_pid_on_target *root) { - netdata_publish_vfs_t accumulator; - memset(&accumulator, 0, sizeof(accumulator)); + memset(vfs, 0, sizeof(netdata_publish_vfs_t)); - while (root) { + for (; root; root = root->next) { int32_t pid = root->pid; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_vfs_t *w = &local_pid->vfs; - accumulator.write_call += w->write_call; - accumulator.writev_call += w->writev_call; - accumulator.read_call += w->read_call; - accumulator.readv_call += w->readv_call; - accumulator.unlink_call += w->unlink_call; - accumulator.fsync_call += w->fsync_call; - accumulator.open_call += w->open_call; - accumulator.create_call += w->create_call; - - accumulator.write_bytes += w->write_bytes; - accumulator.writev_bytes += w->writev_bytes; - accumulator.read_bytes += w->read_bytes; - accumulator.readv_bytes += w->readv_bytes; - - accumulator.write_err += w->write_err; - accumulator.writev_err += w->writev_err; - accumulator.read_err += w->read_err; - accumulator.readv_err += w->readv_err; - accumulator.unlink_err += w->unlink_err; - accumulator.fsync_err += w->fsync_err; - accumulator.open_err += w->open_err; - accumulator.create_err += w->create_err; - } - root = root->next; - } - - // These conditions were added, because we are using incremental algorithm - vfs->write_call = (accumulator.write_call >= vfs->write_call) ? accumulator.write_call : vfs->write_call; - vfs->writev_call = (accumulator.writev_call >= vfs->writev_call) ? accumulator.writev_call : vfs->writev_call; - vfs->read_call = (accumulator.read_call >= vfs->read_call) ? accumulator.read_call : vfs->read_call; - vfs->readv_call = (accumulator.readv_call >= vfs->readv_call) ? accumulator.readv_call : vfs->readv_call; - vfs->unlink_call = (accumulator.unlink_call >= vfs->unlink_call) ? accumulator.unlink_call : vfs->unlink_call; - vfs->fsync_call = (accumulator.fsync_call >= vfs->fsync_call) ? accumulator.fsync_call : vfs->fsync_call; - vfs->open_call = (accumulator.open_call >= vfs->open_call) ? accumulator.open_call : vfs->open_call; - vfs->create_call = (accumulator.create_call >= vfs->create_call) ? accumulator.create_call : vfs->create_call; - - vfs->write_bytes = (accumulator.write_bytes >= vfs->write_bytes) ? accumulator.write_bytes : vfs->write_bytes; - vfs->writev_bytes = (accumulator.writev_bytes >= vfs->writev_bytes) ? accumulator.writev_bytes : vfs->writev_bytes; - vfs->read_bytes = (accumulator.read_bytes >= vfs->read_bytes) ? accumulator.read_bytes : vfs->read_bytes; - vfs->readv_bytes = (accumulator.readv_bytes >= vfs->readv_bytes) ? accumulator.readv_bytes : vfs->readv_bytes; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *w = local_pid->vfs; + if (!w) + continue; - vfs->write_err = (accumulator.write_err >= vfs->write_err) ? accumulator.write_err : vfs->write_err; - vfs->writev_err = (accumulator.writev_err >= vfs->writev_err) ? accumulator.writev_err : vfs->writev_err; - vfs->read_err = (accumulator.read_err >= vfs->read_err) ? accumulator.read_err : vfs->read_err; - vfs->readv_err = (accumulator.readv_err >= vfs->readv_err) ? accumulator.readv_err : vfs->readv_err; - vfs->unlink_err = (accumulator.unlink_err >= vfs->unlink_err) ? accumulator.unlink_err : vfs->unlink_err; - vfs->fsync_err = (accumulator.fsync_err >= vfs->fsync_err) ? accumulator.fsync_err : vfs->fsync_err; - vfs->open_err = (accumulator.open_err >= vfs->open_err) ? accumulator.open_err : vfs->open_err; - vfs->create_err = (accumulator.create_err >= vfs->create_err) ? accumulator.create_err : vfs->create_err; + vfs_aggregate_publish_vfs(vfs, w); + } } /** @@ -1183,12 +1209,13 @@ void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct ebpf_target *root) * * @param out the vector with read values. */ -static void vfs_apps_accumulator(netdata_publish_vfs_t *out, int maps_per_core) +static void vfs_apps_accumulator(netdata_ebpf_vfs_t *out, int maps_per_core) { int i, end = (maps_per_core) ? ebpf_nprocs : 1; - netdata_publish_vfs_t *total = &out[0]; + netdata_ebpf_vfs_t *total = &out[0]; + uint64_t ct = total->ct; for (i = 1; i < end; i++) { - netdata_publish_vfs_t *w = &out[i]; + netdata_ebpf_vfs_t *w = &out[i]; total->write_call += w->write_call; total->writev_call += w->writev_call; @@ -1206,17 +1233,23 @@ static void vfs_apps_accumulator(netdata_publish_vfs_t *out, int maps_per_core) total->read_err += w->read_err; total->readv_err += w->readv_err; total->unlink_err += w->unlink_err; + + if (w->ct > ct) + ct = w->ct; + + if (!total->name[0] && w->name[0]) + strncpyz(total->name, w->name, sizeof(total->name) - 1); } } /** * Read the hash table and store data to allocated vectors. */ -static void ebpf_vfs_read_apps(int maps_per_core, int max_period) +static void ebpf_vfs_read_apps(int maps_per_core, uint32_t max_period) { - netdata_publish_vfs_t *vv = vfs_vector; + netdata_ebpf_vfs_t *vv = vfs_vector; int fd = vfs_maps[NETDATA_VFS_PID].map_fd; - size_t length = sizeof(netdata_publish_vfs_t); + size_t length = sizeof(netdata_ebpf_vfs_t); if (maps_per_core) length *= ebpf_nprocs; @@ -1228,17 +1261,22 @@ static void ebpf_vfs_read_apps(int maps_per_core, int max_period) vfs_apps_accumulator(vv, maps_per_core); - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(key, vv->tgid); - if (!local_pid) - goto end_vfs_loop; + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(key, vv->tgid, vv->name, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *publish = local_pid->vfs; + if (!publish) + local_pid->vfs = publish = ebpf_vfs_allocate_publish(); - netdata_publish_vfs_t *publish = &local_pid->vfs; if (!publish->ct || publish->ct != vv->ct) { - memcpy(publish, vv, sizeof(netdata_publish_vfs_t)); + vfs_aggregate_set_vfs(publish, vv); local_pid->not_updated = 0; } else if (++local_pid->not_updated >= max_period){ - bpf_map_delete_elem(fd, &key); - local_pid->not_updated = 0; + if (kill(key, 0)) { // No PID found + ebpf_reset_specific_pid_data(local_pid); + } else { // There is PID, but there is not data anymore + ebpf_release_pid_data(local_pid, fd, key, EBPF_PIDS_VFS_IDX); + ebpf_vfs_release_publish(publish); + local_pid->vfs = NULL; + } } end_vfs_loop: @@ -1264,12 +1302,14 @@ static void read_update_vfs_cgroup() for (pids = ect->pids; pids; pids = pids->next) { int pid = pids->pid; netdata_publish_vfs_t *out = &pids->vfs; - ebpf_pid_stat_t *local_pid = ebpf_get_pid_entry(pid, 0); - if (local_pid) { - netdata_publish_vfs_t *in = &local_pid->vfs; + memset(out, 0, sizeof(netdata_publish_vfs_t)); - memcpy(out, in, sizeof(netdata_publish_vfs_t)); - } + ebpf_pid_data_t *local_pid = ebpf_get_pid_data(pid, 0, NULL, EBPF_PIDS_VFS_IDX); + netdata_publish_vfs_t *in = local_pid->vfs; + if (!in) + continue; + + vfs_aggregate_publish_vfs(out, in); } } pthread_mutex_unlock(&mutex_cgroup_shm); @@ -1284,7 +1324,7 @@ static void read_update_vfs_cgroup() * @param pids input data */ static void ebpf_vfs_sum_cgroup_pids(netdata_publish_vfs_t *vfs, struct pid_on_target2 *pids) - { +{ netdata_publish_vfs_t accumulator; memset(&accumulator, 0, sizeof(accumulator)); @@ -1888,70 +1928,70 @@ static void ebpf_send_systemd_vfs_charts(ebpf_module_t *em) continue; } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_FILE_DELETED); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_FILE_DELETED, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.unlink_call); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.write_call + ect->publish_systemd_vfs.writev_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.write_err + ect->publish_systemd_vfs.writev_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.read_call + ect->publish_systemd_vfs.readv_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.read_err + ect->publish_systemd_vfs.readv_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, ""); write_chart_dimension("bytes", ect->publish_systemd_vfs.write_bytes + ect->publish_systemd_vfs.writev_bytes); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_READ_BYTES); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, ""); write_chart_dimension("bytes", ect->publish_systemd_vfs.read_bytes + ect->publish_systemd_vfs.readv_bytes); ebpf_write_end_chart(); - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.fsync_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.fsync_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.open_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.open_err); ebpf_write_end_chart(); } - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.create_call); ebpf_write_end_chart(); if (em->mode < MODE_ENTRY) { - ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR); + ebpf_write_begin_chart(ect->name, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, ""); write_chart_dimension("calls", ect->publish_systemd_vfs.create_err); ebpf_write_end_chart(); } @@ -2031,13 +2071,17 @@ void *ebpf_read_vfs_thread(void *ptr) int maps_per_core = em->maps_per_core; int update_every = em->update_every; + int collect_pid = (em->apps_charts || em->cgroup_charts); + if (!collect_pid) + return NULL; int counter = update_every - 1; uint32_t lifetime = em->lifetime; uint32_t running_time = 0; usec_t period = update_every * USEC_PER_SEC; - int max_period = update_every * EBPF_CLEANUP_FACTOR; + uint32_t max_period = EBPF_CLEANUP_FACTOR; + pids_fd[EBPF_PIDS_VFS_IDX] = vfs_maps[NETDATA_VFS_PID].map_fd; while (!ebpf_plugin_stop() && running_time < lifetime) { (void)heartbeat_next(&hb, period); if (ebpf_plugin_stop() || ++counter != update_every) @@ -2527,7 +2571,7 @@ void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr) */ static void ebpf_vfs_allocate_global_vectors() { - vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_publish_vfs_t)); + vfs_vector = callocz(ebpf_nprocs, sizeof(netdata_ebpf_vfs_t)); memset(vfs_aggregated_data, 0, sizeof(vfs_aggregated_data)); memset(vfs_publish_aggregated, 0, sizeof(vfs_publish_aggregated)); @@ -2586,6 +2630,7 @@ static int ebpf_vfs_load_bpf(ebpf_module_t *em) */ void *ebpf_vfs_thread(void *ptr) { + pids_fd[EBPF_PIDS_VFS_IDX] = -1; ebpf_module_t *em = (ebpf_module_t *)ptr; CLEANUP_FUNCTION_REGISTER(ebpf_vfs_exit) cleanup_ptr = em; @@ -2618,7 +2663,8 @@ void *ebpf_vfs_thread(void *ptr) pthread_mutex_unlock(&lock); - ebpf_read_vfs.thread = nd_thread_create(ebpf_read_vfs.name, NETDATA_THREAD_OPTION_DEFAULT, ebpf_read_vfs_thread, em); + ebpf_read_vfs.thread = nd_thread_create(ebpf_read_vfs.name, NETDATA_THREAD_OPTION_DEFAULT, + ebpf_read_vfs_thread, em); vfs_collector(em); diff --git a/src/collectors/ebpf.plugin/ebpf_vfs.h b/src/collectors/ebpf.plugin/ebpf_vfs.h index 398e28317..7458cd857 100644 --- a/src/collectors/ebpf.plugin/ebpf_vfs.h +++ b/src/collectors/ebpf.plugin/ebpf_vfs.h @@ -55,19 +55,19 @@ #define NETDATA_CGROUP_VFS_FSYNC_CONTEXT "cgroup.vfs_fsync" #define NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT "cgroup.vfs_fsync_error" -#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "systemd.services.vfs_unlink" -#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "systemd.services.vfs_write" -#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "systemd.services.vfs_write_error" -#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "systemd.services.vfs_read" -#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "systemd.services.vfs_read_error" -#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "systemd.services.vfs_write_bytes" -#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "systemd.services.vfs_read_bytes" -#define NETDATA_SYSTEMD_VFS_CREATE_CONTEXT "systemd.services.vfs_create" -#define NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT "systemd.services.vfs_create_error" -#define NETDATA_SYSTEMD_VFS_OPEN_CONTEXT "systemd.services.vfs_open" -#define NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT "systemd.services.vfs_open_error" -#define NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT "systemd.services.vfs_fsync" -#define NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT "systemd.services.vfs_fsync_error" +#define NETDATA_SYSTEMD_VFS_UNLINK_CONTEXT "systemd.service.vfs_unlink" +#define NETDATA_SYSTEMD_VFS_WRITE_CONTEXT "systemd.service.vfs_write" +#define NETDATA_SYSTEMD_VFS_WRITE_ERROR_CONTEXT "systemd.service.vfs_write_error" +#define NETDATA_SYSTEMD_VFS_READ_CONTEXT "systemd.service.vfs_read" +#define NETDATA_SYSTEMD_VFS_READ_ERROR_CONTEXT "systemd.service.vfs_read_error" +#define NETDATA_SYSTEMD_VFS_WRITE_BYTES_CONTEXT "systemd.service.vfs_write_bytes" +#define NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT "systemd.service.vfs_read_bytes" +#define NETDATA_SYSTEMD_VFS_CREATE_CONTEXT "systemd.service.vfs_create" +#define NETDATA_SYSTEMD_VFS_CREATE_ERROR_CONTEXT "systemd.service.vfs_create_error" +#define NETDATA_SYSTEMD_VFS_OPEN_CONTEXT "systemd.service.vfs_open" +#define NETDATA_SYSTEMD_VFS_OPEN_ERROR_CONTEXT "systemd.service.vfs_open_error" +#define NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT "systemd.service.vfs_fsync" +#define NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT "systemd.service.vfs_fsync_error" // ARAL name #define NETDATA_EBPF_VFS_ARAL_NAME "ebpf_vfs" @@ -75,7 +75,38 @@ // dimension #define EBPF_COMMON_UNITS_BYTES "bytes/s" -typedef struct netdata_publish_vfs { +typedef struct __attribute__((packed)) netdata_publish_vfs { + uint64_t ct; + + //Counter + uint32_t write_call; + uint32_t writev_call; + uint32_t read_call; + uint32_t readv_call; + uint32_t unlink_call; + uint32_t fsync_call; + uint32_t open_call; + uint32_t create_call; + + //Accumulator + uint64_t write_bytes; + uint64_t writev_bytes; + uint64_t readv_bytes; + uint64_t read_bytes; + + //Counter + uint32_t write_err; + uint32_t writev_err; + uint32_t read_err; + uint32_t readv_err; + uint32_t unlink_err; + uint32_t fsync_err; + uint32_t open_err; + uint32_t create_err; + +} netdata_publish_vfs_t; + +typedef struct netdata_ebpf_vfs { uint64_t ct; uint32_t tgid; uint32_t uid; @@ -107,7 +138,7 @@ typedef struct netdata_publish_vfs { uint32_t fsync_err; uint32_t open_err; uint32_t create_err; -} netdata_publish_vfs_t; +} netdata_ebpf_vfs_t; enum netdata_publish_vfs_list { NETDATA_KEY_PUBLISH_VFS_UNLINK, diff --git a/src/collectors/ebpf.plugin/integrations/ebpf_process.md b/src/collectors/ebpf.plugin/integrations/ebpf_process.md index d6da09031..817d9169b 100644 --- a/src/collectors/ebpf.plugin/integrations/ebpf_process.md +++ b/src/collectors/ebpf.plugin/integrations/ebpf_process.md @@ -68,6 +68,7 @@ Metrics: | netdata.ebpf_aral_stat_size | memory | bytes | | netdata.ebpf_aral_stat_alloc | aral | calls | | netdata.ebpf_threads | total, running | threads | +| netdata.ebpf_pids | user, kernel | pids | | netdata.ebpf_load_methods | legacy, co-re | methods | | netdata.ebpf_kernel_memory | memory_locked | bytes | | netdata.ebpf_hash_tables_count | hash_table | hash tables | diff --git a/src/collectors/ebpf.plugin/integrations/ebpf_socket.md b/src/collectors/ebpf.plugin/integrations/ebpf_socket.md index c5b613315..917dcaba6 100644 --- a/src/collectors/ebpf.plugin/integrations/ebpf_socket.md +++ b/src/collectors/ebpf.plugin/integrations/ebpf_socket.md @@ -92,8 +92,7 @@ Metrics: |:------|:----------|:----| | app.ebpf_call_tcp_v4_connection | connections | connections/s | | app.ebpf_call_tcp_v6_connection | connections | connections/s | -| app.ebpf_sock_bytes_sent | bandwidth | kilobits/s | -| app.ebpf_sock_bytes_received | bandwidth | kilobits/s | +| app.ebpf_sock_total_bandwidth | received, sent | kilobits/s | | app.ebpf_call_tcp_sendmsg | calls | calls/s | | app.ebpf_call_tcp_cleanup_rbuf | calls | calls/s | | app.ebpf_call_tcp_retransmit | calls | calls/s | @@ -110,23 +109,22 @@ Metrics: | Metric | Dimensions | Unit | |:------|:----------|:----| -| cgroup.net_conn_ipv4 | connected_v4 | connections/s | -| cgroup.net_conn_ipv6 | connected_v6 | connections/s | -| cgroup.net_bytes_recv | received | calls/s | -| cgroup.net_bytes_sent | sent | calls/s | -| cgroup.net_tcp_recv | received | calls/s | -| cgroup.net_tcp_send | sent | calls/s | -| cgroup.net_retransmit | retransmitted | calls/s | -| cgroup.net_udp_send | sent | calls/s | -| cgroup.net_udp_recv | received | calls/s | -| services.net_conn_ipv6 | a dimension per systemd service | connections/s | -| services.net_bytes_recv | a dimension per systemd service | kilobits/s | -| services.net_bytes_sent | a dimension per systemd service | kilobits/s | -| services.net_tcp_recv | a dimension per systemd service | calls/s | -| services.net_tcp_send | a dimension per systemd service | calls/s | -| services.net_tcp_retransmit | a dimension per systemd service | calls/s | -| services.net_udp_send | a dimension per systemd service | calls/s | -| services.net_udp_recv | a dimension per systemd service | calls/s | +| cgroup.net_conn_ipv4 | connections | connections/s | +| cgroup.net_conn_ipv6 | connections | connections/s | +| cgroup.net_total_bandwidth | received, sent | kilobits/s | +| cgroup.net_tcp_recv | calls | calls/s | +| cgroup.net_tcp_send | calls | calls/s | +| cgroup.net_retransmit | calls | calls/s | +| cgroup.net_udp_send | calls | calls/s | +| cgroup.net_udp_recv | calls | calls/s | +| services.net_conn_ipv4 | connections | connections/s | +| services.net_conn_ipv6 | connections | connections/s | +| services.net_total_bandwidth | received, sent | kilobits/s | +| services.net_tcp_recv | calls | calls/s | +| services.net_tcp_send | calls | calls/s | +| services.net_tcp_retransmit | calls | calls/s | +| services.net_udp_send | calls | calls/s | +| services.net_udp_recv | calls | calls/s | diff --git a/src/collectors/ebpf.plugin/metadata.yaml b/src/collectors/ebpf.plugin/metadata.yaml index 4921e44f0..861b0ba82 100644 --- a/src/collectors/ebpf.plugin/metadata.yaml +++ b/src/collectors/ebpf.plugin/metadata.yaml @@ -1739,18 +1739,13 @@ modules: chart_type: stacked dimensions: - name: connections - - name: app.ebpf_sock_bytes_sent + - name: app.ebpf_sock_total_bandwidth description: Bytes sent unit: "kilobits/s" chart_type: stacked dimensions: - - name: bandwidth - - name: app.ebpf_sock_bytes_received - description: bytes received - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: bandwidth + - name: received + - name: sent - name: app.ebpf_call_tcp_sendmsg description: Calls for tcp_sendmsg unit: "calls/s" @@ -1790,103 +1785,99 @@ modules: unit: "connections/s" chart_type: line dimensions: - - name: connected_v4 + - name: connections - name: cgroup.net_conn_ipv6 description: Calls to tcp_v6_connection unit: "connections/s" chart_type: line dimensions: - - name: connected_v6 - - name: cgroup.net_bytes_recv + - name: connections + - name: cgroup.net_total_bandwidth description: Bytes received - unit: "calls/s" + unit: "kilobits/s" chart_type: line dimensions: - name: received - - name: cgroup.net_bytes_sent - description: Bytes sent - unit: "calls/s" - chart_type: line - dimensions: - name: sent - name: cgroup.net_tcp_recv description: Calls to tcp_cleanup_rbuf. unit: "calls/s" chart_type: line dimensions: - - name: received + - name: calls - name: cgroup.net_tcp_send description: Calls to tcp_sendmsg. unit: "calls/s" chart_type: line dimensions: - - name: sent + - name: calls - name: cgroup.net_retransmit description: Calls to tcp_retransmit. unit: "calls/s" chart_type: line dimensions: - - name: retransmitted + - name: calls - name: cgroup.net_udp_send description: Calls to udp_sendmsg unit: "calls/s" chart_type: line dimensions: - - name: sent + - name: calls - name: cgroup.net_udp_recv description: Calls to udp_recvmsg unit: "calls/s" chart_type: line dimensions: - - name: received + - name: calls + - name: services.net_conn_ipv4 + description: Calls to tcp_v4_connection + unit: "connections/s" + chart_type: stacked + dimensions: + - name: connections - name: services.net_conn_ipv6 description: Calls to tcp_v6_connection unit: "connections/s" chart_type: stacked dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_recv + - name: connections + - name: services.net_total_bandwidth description: Bytes received unit: "kilobits/s" chart_type: stacked dimensions: - - name: a dimension per systemd service - - name: services.net_bytes_sent - description: Bytes sent - unit: "kilobits/s" - chart_type: stacked - dimensions: - - name: a dimension per systemd service + - name: received + - name: sent - name: services.net_tcp_recv description: Calls to tcp_cleanup_rbuf. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_tcp_send description: Calls to tcp_sendmsg. unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_tcp_retransmit description: Calls to tcp_retransmit unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_udp_send description: Calls to udp_sendmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - name: services.net_udp_recv description: Calls to udp_recvmsg unit: "calls/s" chart_type: stacked dimensions: - - name: a dimension per systemd service + - name: calls - meta: plugin_name: ebpf.plugin module_name: dcstat @@ -3263,6 +3254,13 @@ modules: dimensions: - name: total - name: running + - name: netdata.ebpf_pids + description: Total number of monitored PIDs + unit: "pids" + chart_type: line + dimensions: + - name: user + - name: kernel - name: netdata.ebpf_load_methods description: Load info unit: "methods" diff --git a/src/collectors/freebsd.plugin/freebsd_sysctl.c b/src/collectors/freebsd.plugin/freebsd_sysctl.c index 93ec98dc8..0fa710275 100644 --- a/src/collectors/freebsd.plugin/freebsd_sysctl.c +++ b/src/collectors/freebsd.plugin/freebsd_sysctl.c @@ -24,6 +24,10 @@ #include <netinet/udp.h> #include <netinet/udp_var.h> +#define _COMMON_PLUGIN_NAME "freebsd.plugin" +#define _COMMON_PLUGIN_MODULE_NAME "freebsd" +#include "../common-contexts/common-contexts.h" + // -------------------------------------------------------------------------------------------------------------------- // common definitions and variables @@ -574,28 +578,7 @@ int do_hw_intcnt(int update_every, usec_t dt) { static RRDSET *st_intr = NULL; static RRDDIM *rd_intr = NULL; - if (unlikely(!st_intr)) { - st_intr = rrdset_create_localhost( - "system", - "intr", - NULL, - "interrupts", - NULL, - "Total Hardware Interrupts", - "interrupts/s", - "freebsd.plugin", - "hw.intrcnt", - NETDATA_CHART_PRIO_SYSTEM_INTR, - update_every, - RRDSET_TYPE_LINE - ); - rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL); - - rd_intr = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(st_intr, rd_intr, totalintr); - rrdset_done(st_intr); + common_interrupts(totalintr, update_every, "hw.intrcnt"); size_t size; static int mib_hw_intrnames[2] = {0, 0}; @@ -1159,30 +1142,10 @@ int do_kern_ipc_sem(int update_every, usec_t dt) { } } - static RRDSET *st_semaphores = NULL, *st_semaphore_arrays = NULL; - static RRDDIM *rd_semaphores = NULL, *rd_semaphore_arrays = NULL; - - if (unlikely(!st_semaphores)) { - st_semaphores = rrdset_create_localhost( - "system", - "ipc_semaphores", - NULL, - "ipc semaphores", - NULL, - "IPC Semaphores", - "semaphores", - "freebsd.plugin", - "kern.ipc.sem", - NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES, - update_every, - RRDSET_TYPE_AREA - ); - - rd_semaphores = rrddim_add(st_semaphores, "semaphores", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } + static RRDSET *st_semaphore_arrays = NULL; + static RRDDIM *rd_semaphore_arrays = NULL; - rrddim_set_by_pointer(st_semaphores, rd_semaphores, ipc_sem.semaphores); - rrdset_done(st_semaphores); + common_semaphore_ipc(ipc_sem.semaphores, 0.0, "kern.ipc.sem", update_every); if (unlikely(!st_semaphore_arrays)) { st_semaphore_arrays = rrdset_create_localhost( diff --git a/src/collectors/freeipmi.plugin/freeipmi_plugin.c b/src/collectors/freeipmi.plugin/freeipmi_plugin.c index 4d942f85c..38fb1d19b 100644 --- a/src/collectors/freeipmi.plugin/freeipmi_plugin.c +++ b/src/collectors/freeipmi.plugin/freeipmi_plugin.c @@ -1120,7 +1120,7 @@ static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stt, } int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *stt) { - errno = 0; + errno_clear(); if(type & IPMI_COLLECT_TYPE_SENSORS) { stt->sensors.collected = 0; @@ -1652,6 +1652,10 @@ int main (int argc, char **argv) { bool debug = false; + // TODO: Workaround for https://github.com/netdata/netdata/issues/17931 + // This variable will be removed once the issue is fixed. + bool restart_every = true; + // ------------------------------------------------------------------------ // parse command line parameters @@ -1672,6 +1676,10 @@ int main (int argc, char **argv) { debug = true; continue; } + else if(strcmp("no-restart", argv[i]) == 0) { + restart_every = false; + continue; + } else if(strcmp("sel", argv[i]) == 0) { netdata_do_sel = true; continue; @@ -1922,7 +1930,7 @@ int main (int argc, char **argv) { collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]); } - errno = 0; + errno_clear(); if(freq_s && freq_s < update_every) collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.", @@ -2100,7 +2108,7 @@ int main (int argc, char **argv) { "END\n"); // restart check (14400 seconds) - if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) { + if (restart_every && (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS)) { collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__); fprintf(stdout, "EXIT\n"); plugin_exit(0); diff --git a/src/collectors/network-viewer.plugin/network-viewer.c b/src/collectors/network-viewer.plugin/network-viewer.c index 764151f5c..06dde7382 100644 --- a/src/collectors/network-viewer.plugin/network-viewer.c +++ b/src/collectors/network-viewer.plugin/network-viewer.c @@ -2,18 +2,30 @@ #include "collectors/all.h" #include "libnetdata/libnetdata.h" + #include "libnetdata/required_dummies.h" +static SPAWN_SERVER *spawn_srv = NULL; + #define ENABLE_DETAILED_VIEW #define LOCAL_SOCKETS_EXTENDED_MEMBERS struct { \ size_t count; \ - const char *local_address_space; \ - const char *remote_address_space; \ + struct { \ + pid_t pid; \ + uid_t uid; \ + SOCKET_DIRECTION direction; \ + int state; \ + uint64_t net_ns_inode; \ + struct socket_endpoint server; \ + const char *local_address_space; \ + const char *remote_address_space; \ + } aggregated_key; \ } network_viewer; #include "libnetdata/maps/local-sockets.h" #include "libnetdata/maps/system-users.h" +#include "libnetdata/maps/system-services.h" #define NETWORK_CONNECTIONS_VIEWER_FUNCTION "network-connections" #define NETWORK_CONNECTIONS_VIEWER_HELP "Network connections explorer" @@ -25,6 +37,7 @@ netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER; static bool plugin_should_exit = false; static USERNAMES_CACHE *uc; +static SERVICENAMES_CACHE *sc; ENUM_STR_MAP_DEFINE(SOCKET_DIRECTION) = { { .id = SOCKET_DIRECTION_LISTEN, .name = "listen" }, @@ -57,19 +70,49 @@ ENUM_STR_MAP_DEFINE(TCP_STATE) = { }; ENUM_STR_DEFINE_FUNCTIONS(TCP_STATE, 0, "unknown"); -static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) { +struct sockets_stats { + BUFFER *wb; + + struct { + uint32_t tcpi_rtt; + uint32_t tcpi_rcv_rtt; + uint32_t tcpi_total_retrans; + } max; +}; + +static void local_socket_to_json_array(struct sockets_stats *st, LOCAL_SOCKET *n, uint64_t proc_self_net_ns_inode, bool aggregated) { + if(n->direction == SOCKET_DIRECTION_NONE) + return; + + BUFFER *wb = st->wb; + char local_address[INET6_ADDRSTRLEN]; char remote_address[INET6_ADDRSTRLEN]; char *protocol; if(n->local.family == AF_INET) { ipv4_address_to_txt(n->local.ip.ipv4, local_address); - ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); + + if(local_sockets_is_zero_address(&n->remote)) + remote_address[0] = '\0'; + else + ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); + protocol = n->local.protocol == IPPROTO_TCP ? "tcp4" : "udp4"; } + else if(is_local_socket_ipv46(n)) { + strncpyz(local_address, "*", sizeof(local_address) - 1); + remote_address[0] = '\0'; + protocol = n->local.protocol == IPPROTO_TCP ? "tcp46" : "udp46"; + } else if(n->local.family == AF_INET6) { ipv6_address_to_txt(&n->local.ip.ipv6, local_address); - ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); + + if(local_sockets_is_zero_address(&n->remote)) + remote_address[0] = '\0'; + else + ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); + protocol = n->local.protocol == IPPROTO_TCP ? "tcp6" : "udp6"; } else @@ -113,47 +156,60 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro string_freez(u); } - if(!aggregated) { - buffer_json_add_array_item_string(wb, local_address); - buffer_json_add_array_item_uint64(wb, n->local.port); - } - buffer_json_add_array_item_string(wb, n->network_viewer.local_address_space); - - if(!aggregated) { - buffer_json_add_array_item_string(wb, remote_address); - buffer_json_add_array_item_uint64(wb, n->remote.port); - } - buffer_json_add_array_item_string(wb, n->network_viewer.remote_address_space); - - uint16_t server_port = 0; - const char *server_address = NULL; - const char *client_address_space = NULL; - const char *server_address_space = NULL; + struct socket_endpoint *server_endpoint; + const char *server_address; + const char *client_address_space; + const char *server_address_space; switch (n->direction) { case SOCKET_DIRECTION_LISTEN: case SOCKET_DIRECTION_INBOUND: case SOCKET_DIRECTION_LOCAL_INBOUND: - server_port = n->local.port; server_address = local_address; - server_address_space = n->network_viewer.local_address_space; - client_address_space = n->network_viewer.remote_address_space; + server_address_space = n->network_viewer.aggregated_key.local_address_space; + client_address_space = n->network_viewer.aggregated_key.remote_address_space; + server_endpoint = &n->local; break; case SOCKET_DIRECTION_OUTBOUND: case SOCKET_DIRECTION_LOCAL_OUTBOUND: - server_port = n->remote.port; server_address = remote_address; - server_address_space = n->network_viewer.remote_address_space; - client_address_space = n->network_viewer.local_address_space; + server_address_space = n->network_viewer.aggregated_key.remote_address_space; + client_address_space = n->network_viewer.aggregated_key.local_address_space; + server_endpoint = &n->remote; break; case SOCKET_DIRECTION_NONE: + server_address = NULL; + client_address_space = NULL; + server_address_space = NULL; + server_endpoint = NULL; break; } - if(aggregated) + + if(server_endpoint) { + STRING *serv = system_servicenames_cache_lookup(sc, server_endpoint->port, server_endpoint->protocol); + buffer_json_add_array_item_string(wb, string2str(serv)); + } + else + buffer_json_add_array_item_string(wb, "[unknown]"); + + if(!aggregated) { + buffer_json_add_array_item_string(wb, local_address); + buffer_json_add_array_item_uint64(wb, n->local.port); + } + buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.local_address_space); + + if(!aggregated) { + buffer_json_add_array_item_string(wb, remote_address); + buffer_json_add_array_item_uint64(wb, n->remote.port); + } + buffer_json_add_array_item_string(wb, n->network_viewer.aggregated_key.remote_address_space); + + if(aggregated) { buffer_json_add_array_item_string(wb, server_address); + } - buffer_json_add_array_item_uint64(wb, server_port); + buffer_json_add_array_item_uint64(wb, n->network_viewer.aggregated_key.server.port); if(aggregated) { buffer_json_add_array_item_string(wb, client_address_space); @@ -162,58 +218,176 @@ static void local_socket_to_json_array(BUFFER *wb, LOCAL_SOCKET *n, uint64_t pro // buffer_json_add_array_item_uint64(wb, n->inode); // buffer_json_add_array_item_uint64(wb, n->net_ns_inode); + + // RTT + buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rtt / (double)USEC_PER_MS); + if(st->max.tcpi_rtt < n->info.tcp.tcpi_rtt) + st->max.tcpi_rtt = n->info.tcp.tcpi_rtt; + + // Receiver RTT + buffer_json_add_array_item_double(wb, (double)n->info.tcp.tcpi_rcv_rtt / (double)USEC_PER_MS); + if(st->max.tcpi_rcv_rtt < n->info.tcp.tcpi_rcv_rtt) + st->max.tcpi_rcv_rtt = n->info.tcp.tcpi_rcv_rtt; + + // Retransmissions + buffer_json_add_array_item_uint64(wb, n->info.tcp.tcpi_total_retrans); + if(st->max.tcpi_total_retrans < n->info.tcp.tcpi_total_retrans) + st->max.tcpi_total_retrans = n->info.tcp.tcpi_total_retrans; + + // count buffer_json_add_array_item_uint64(wb, n->network_viewer.count); } buffer_json_array_close(wb); } -static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) { +static void populate_aggregated_key(LOCAL_SOCKET *n) { n->network_viewer.count = 1; - n->network_viewer.local_address_space = local_sockets_address_space(&n->local); - n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote); - local_socket_to_json_array(data, n, ls->proc_self_net_ns_inode, false); -} -static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) { - SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data; - n->network_viewer.count = 1; - n->network_viewer.local_address_space = local_sockets_address_space(&n->local); - n->network_viewer.remote_address_space = local_sockets_address_space(&n->remote); + n->network_viewer.aggregated_key.pid = n->pid; + n->network_viewer.aggregated_key.uid = n->uid; + n->network_viewer.aggregated_key.direction = n->direction; + n->network_viewer.aggregated_key.net_ns_inode = n->net_ns_inode; + n->network_viewer.aggregated_key.state = n->state; switch(n->direction) { case SOCKET_DIRECTION_INBOUND: case SOCKET_DIRECTION_LOCAL_INBOUND: case SOCKET_DIRECTION_LISTEN: - memset(&n->remote.ip, 0, sizeof(n->remote.ip)); - n->remote.port = 0; + n->network_viewer.aggregated_key.server = n->local; break; case SOCKET_DIRECTION_OUTBOUND: case SOCKET_DIRECTION_LOCAL_OUTBOUND: - memset(&n->local.ip, 0, sizeof(n->local.ip)); - n->local.port = 0; + n->network_viewer.aggregated_key.server = n->remote; break; case SOCKET_DIRECTION_NONE: - return; + break; } - n->inode = 0; - n->local_ip_hash = 0; - n->remote_ip_hash = 0; - n->local_port_hash = 0; - n->timer = 0; - n->retransmits = 0; - n->expires = 0; - n->rqueue = 0; - n->wqueue = 0; - memset(&n->local_port_key, 0, sizeof(n->local_port_key)); - - XXH64_hash_t hash = XXH3_64bits(n, sizeof(*n)); + n->network_viewer.aggregated_key.local_address_space = local_sockets_address_space(&n->local); + n->network_viewer.aggregated_key.remote_address_space = local_sockets_address_space(&n->remote); +} + +static void local_sockets_cb_to_json(LS_STATE *ls, LOCAL_SOCKET *n, void *data) { + struct sockets_stats *st = data; + populate_aggregated_key(n); + local_socket_to_json_array(st, n, ls->proc_self_net_ns_inode, false); +} + +#define KEEP_THE_BIGGER(a, b) (a) = ((a) < (b)) ? (b) : (a) +#define KEEP_THE_SMALLER(a, b) (a) = ((a) > (b)) ? (b) : (a) +#define SUM_THEM_ALL(a, b) (a) += (b) +#define OR_THEM_ALL(a, b) (a) |= (b) + +static void local_sockets_cb_to_aggregation(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, void *data) { + SIMPLE_HASHTABLE_AGGREGATED_SOCKETS *ht = data; + + populate_aggregated_key(n); + XXH64_hash_t hash = XXH3_64bits(&n->network_viewer.aggregated_key, sizeof(n->network_viewer.aggregated_key)); SIMPLE_HASHTABLE_SLOT_AGGREGATED_SOCKETS *sl = simple_hashtable_get_slot_AGGREGATED_SOCKETS(ht, hash, n, true); LOCAL_SOCKET *t = SIMPLE_HASHTABLE_SLOT_DATA(sl); if(t) { t->network_viewer.count++; + + KEEP_THE_BIGGER(t->timer, n->timer); + KEEP_THE_BIGGER(t->retransmits, n->retransmits); + KEEP_THE_SMALLER(t->expires, n->expires); + KEEP_THE_BIGGER(t->rqueue, n->rqueue); + KEEP_THE_BIGGER(t->wqueue, n->wqueue); + + // The current number of consecutive retransmissions that have occurred for the most recently transmitted segment. + SUM_THEM_ALL(t->info.tcp.tcpi_retransmits, n->info.tcp.tcpi_retransmits); + + // The total number of retransmissions that have occurred for the entire connection since it was established. + SUM_THEM_ALL(t->info.tcp.tcpi_total_retrans, n->info.tcp.tcpi_total_retrans); + + // The total number of segments that have been retransmitted since the connection was established. + SUM_THEM_ALL(t->info.tcp.tcpi_retrans, n->info.tcp.tcpi_retrans); + + // The number of keepalive probes sent + SUM_THEM_ALL(t->info.tcp.tcpi_probes, n->info.tcp.tcpi_probes); + + // The number of times the retransmission timeout has been backed off. + SUM_THEM_ALL(t->info.tcp.tcpi_backoff, n->info.tcp.tcpi_backoff); + + // A bitmask representing the TCP options currently enabled for the connection, such as SACK and Timestamps. + OR_THEM_ALL(t->info.tcp.tcpi_options, n->info.tcp.tcpi_options); + + // The send window scale value used for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_wscale, n->info.tcp.tcpi_snd_wscale); + + // The receive window scale value used for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_wscale, n->info.tcp.tcpi_rcv_wscale); + + // Retransmission timeout in milliseconds + KEEP_THE_SMALLER(t->info.tcp.tcpi_rto, n->info.tcp.tcpi_rto); + + // The delayed acknowledgement timeout in milliseconds. + KEEP_THE_SMALLER(t->info.tcp.tcpi_ato, n->info.tcp.tcpi_ato); + + // The maximum segment size for sending. + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_mss, n->info.tcp.tcpi_snd_mss); + + // The maximum segment size for receiving. + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_mss, n->info.tcp.tcpi_rcv_mss); + + // The number of unacknowledged segments + SUM_THEM_ALL(t->info.tcp.tcpi_unacked, n->info.tcp.tcpi_unacked); + + // The number of segments that have been selectively acknowledged + SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked); + + // The number of segments that have been selectively acknowledged + SUM_THEM_ALL(t->info.tcp.tcpi_sacked, n->info.tcp.tcpi_sacked); + + // The number of lost segments. + SUM_THEM_ALL(t->info.tcp.tcpi_lost, n->info.tcp.tcpi_lost); + + // The number of forward acknowledgment segments. + SUM_THEM_ALL(t->info.tcp.tcpi_fackets, n->info.tcp.tcpi_fackets); + + // The time in milliseconds since the last data was sent. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_sent, n->info.tcp.tcpi_last_data_sent); + + // The time in milliseconds since the last acknowledgment was sent (not tracked in Linux, hence often zero). + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_sent, n->info.tcp.tcpi_last_ack_sent); + + // The time in milliseconds since the last data was received. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_data_recv, n->info.tcp.tcpi_last_data_recv); + + // The time in milliseconds since the last acknowledgment was received. + KEEP_THE_SMALLER(t->info.tcp.tcpi_last_ack_recv, n->info.tcp.tcpi_last_ack_recv); + + // The path MTU for this connection + KEEP_THE_SMALLER(t->info.tcp.tcpi_pmtu, n->info.tcp.tcpi_pmtu); + + // The slow start threshold for receiving + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_ssthresh, n->info.tcp.tcpi_rcv_ssthresh); + + // The slow start threshold for sending + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_ssthresh, n->info.tcp.tcpi_snd_ssthresh); + + // The round trip time in milliseconds + KEEP_THE_BIGGER(t->info.tcp.tcpi_rtt, n->info.tcp.tcpi_rtt); + + // The round trip time variance in milliseconds. + KEEP_THE_BIGGER(t->info.tcp.tcpi_rttvar, n->info.tcp.tcpi_rttvar); + + // The size of the sending congestion window. + KEEP_THE_SMALLER(t->info.tcp.tcpi_snd_cwnd, n->info.tcp.tcpi_snd_cwnd); + + // The maximum segment size that could be advertised. + KEEP_THE_BIGGER(t->info.tcp.tcpi_advmss, n->info.tcp.tcpi_advmss); + + // The reordering metric + KEEP_THE_SMALLER(t->info.tcp.tcpi_reordering, n->info.tcp.tcpi_reordering); + + // The receive round trip time in milliseconds. + KEEP_THE_BIGGER(t->info.tcp.tcpi_rcv_rtt, n->info.tcp.tcpi_rcv_rtt); + + // The available space in the receive buffer. + KEEP_THE_SMALLER(t->info.tcp.tcpi_rcv_space, n->info.tcp.tcpi_rcv_space); } else { t = mallocz(sizeof(*t)); @@ -240,6 +414,10 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu wb->content_type = CT_APPLICATION_JSON; buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + struct sockets_stats st = { + .wb = wb, + }; + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); buffer_json_member_add_string(wb, "type", "table"); buffer_json_member_add_time_t(wb, "update_every", 5); @@ -328,9 +506,12 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu .cmdline = true, .comm = true, .namespaces = true, + .tcp_info = true, .max_errors = 10, + .max_concurrent_namespaces = 5, }, + .spawn_server = spawn_srv, .stats = { 0 }, .sockets_hashtable = { 0 }, .local_ips_hashtable = { 0 }, @@ -345,7 +526,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu } else { ls.config.cb = local_sockets_cb_to_json; - ls.config.data = wb; + ls.config.data = &st; } local_sockets_process(&ls); @@ -366,7 +547,7 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu qsort(array, added, sizeof(LOCAL_SOCKET *), local_sockets_compar); for(size_t i = 0; i < added ;i++) { - local_socket_to_json_array(wb, array[i], proc_self_net_ns_inode, true); + local_socket_to_json_array(&st, array[i], proc_self_net_ns_inode, true); string_freez(array[i]->cmdline); freez(array[i]); } @@ -451,6 +632,14 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu RRDF_FIELD_OPTS_VISIBLE, NULL); + // Portname + buffer_rrdf_table_add_field(wb, field_id++, "Portname", "Server Port Name", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + if(!aggregated) { // Local Address buffer_rrdf_table_add_field(wb, field_id++, "LocalIP", "Local IP Address", @@ -555,14 +744,40 @@ void network_viewer_function(const char *transaction, char *function __maybe_unu // RRDF_FIELD_OPTS_NONE, // NULL); + + // RTT + buffer_rrdf_table_add_field(wb, field_id++, "RTT", aggregated ? "Max Smoothed Round Trip Time" : "Smoothed Round Trip Time", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ms", st.max.tcpi_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Asymmetry RTT + buffer_rrdf_table_add_field(wb, field_id++, "RecvRTT", aggregated ? "Max Receiver ACKs RTT" : "Receiver ACKs RTT", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ms", st.max.tcpi_rcv_rtt / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + + // Rentrasmissions + buffer_rrdf_table_add_field(wb, field_id++, "Retrans", "Total Retransmissions", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, "packets", st.max.tcpi_total_retrans, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, + NULL); + // Count buffer_rrdf_table_add_field(wb, field_id++, "Count", "Number of sockets like this", RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, - 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, - RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + 0, "sockets", NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE, aggregated ? (RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY) : RRDF_FIELD_OPTS_NONE, NULL); } + buffer_json_object_close(wb); // columns buffer_json_member_add_string(wb, "default_sort_column", aggregated ? "Count" : "Direction"); @@ -745,20 +960,31 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) { netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX"); if(verify_netdata_host_prefix(true) == -1) exit(1); + spawn_srv = spawn_server_create(SPAWN_SERVER_OPTION_CALLBACK, "setns", local_sockets_spawn_server_callback, argc, (const char **)argv); + if(spawn_srv == NULL) { + fprintf(stderr, "Cannot create spawn server.\n"); + exit(1); + } + uc = system_usernames_cache_init(); + sc = system_servicenames_cache_init(); // ---------------------------------------------------------------------------------------------------------------- if(argc == 2 && strcmp(argv[1], "debug") == 0) { - bool cancelled = false; - usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC; - char buf[] = "network-connections sockets:aggregated"; - network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled, - NULL, HTTP_ACCESS_ALL, NULL, NULL); - - char buf2[] = "network-connections sockets:detailed"; - network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled, - NULL, HTTP_ACCESS_ALL, NULL, NULL); +// for(int i = 0; i < 100; i++) { + bool cancelled = false; + usec_t stop_monotonic_ut = now_monotonic_usec() + 600 * USEC_PER_SEC; + char buf[] = "network-connections sockets:aggregated"; + network_viewer_function("123", buf, &stop_monotonic_ut, &cancelled, + NULL, HTTP_ACCESS_ALL, NULL, NULL); + + char buf2[] = "network-connections sockets:detailed"; + network_viewer_function("123", buf2, &stop_monotonic_ut, &cancelled, + NULL, HTTP_ACCESS_ALL, NULL, NULL); +// } + + spawn_server_destroy(spawn_srv); exit(1); } @@ -799,5 +1025,8 @@ int main(int argc __maybe_unused, char **argv __maybe_unused) { } } + spawn_server_destroy(spawn_srv); + spawn_srv = NULL; + return 0; } diff --git a/src/collectors/nfacct.plugin/plugin_nfacct.c b/src/collectors/nfacct.plugin/plugin_nfacct.c index d3d18a363..92c82351a 100644 --- a/src/collectors/nfacct.plugin/plugin_nfacct.c +++ b/src/collectors/nfacct.plugin/plugin_nfacct.c @@ -809,7 +809,7 @@ int main(int argc, char **argv) { nfacct_signals(); - errno = 0; + errno_clear(); if(freq >= netdata_update_every) netdata_update_every = freq; diff --git a/src/collectors/perf.plugin/perf_plugin.c b/src/collectors/perf.plugin/perf_plugin.c index eb24b55e1..8fb4014e4 100644 --- a/src/collectors/perf.plugin/perf_plugin.c +++ b/src/collectors/perf.plugin/perf_plugin.c @@ -1288,7 +1288,7 @@ int main(int argc, char **argv) { parse_command_line(argc, argv); - errno = 0; + errno_clear(); if(freq >= update_every) update_every = freq; diff --git a/src/collectors/plugins.d/README.md b/src/collectors/plugins.d/README.md index a1549af48..6b53dbed6 100644 --- a/src/collectors/plugins.d/README.md +++ b/src/collectors/plugins.d/README.md @@ -20,7 +20,7 @@ from external processes, thus allowing Netdata to use **external plugins**. | [charts.d.plugin](/src/collectors/charts.d.plugin/README.md) | `BASH` | all | a **plugin orchestrator** for data collection modules written in `BASH` v4+. | | [cups.plugin](/src/collectors/cups.plugin/README.md) | `C` | all | monitors **CUPS** | | [ebpf.plugin](/src/collectors/ebpf.plugin/README.md) | `C` | linux | monitors different metrics on environments using kernel internal functions. | -| [go.d.plugin](/src/go/collectors/go.d.plugin/README.md) | `GO` | all | collects metrics from the system, applications, or third-party APIs. | +| [go.d.plugin](/src/go/plugin/go.d/README.md) | `GO` | all | collects metrics from the system, applications, or third-party APIs. | | [ioping.plugin](/src/collectors/ioping.plugin/README.md) | `C` | all | measures disk latency. | | [freeipmi.plugin](/src/collectors/freeipmi.plugin/README.md) | `C` | linux | collects metrics from enterprise hardware sensors, on Linux servers. | | [nfacct.plugin](/src/collectors/nfacct.plugin/README.md) | `C` | linux | collects netfilter firewall, connection tracker and accounting metrics using `libmnl` and `libnetfilter_acct`. | diff --git a/src/collectors/plugins.d/local_listeners.c b/src/collectors/plugins.d/local_listeners.c index 2829b3e37..2a729b34d 100644 --- a/src/collectors/plugins.d/local_listeners.c +++ b/src/collectors/plugins.d/local_listeners.c @@ -15,6 +15,14 @@ static const char *protocol_name(LOCAL_SOCKET *n) { else return "UNKNOWN_IPV4"; } + else if(is_local_socket_ipv46(n)) { + if (n->local.protocol == IPPROTO_TCP) + return "TCP46"; + else if(n->local.protocol == IPPROTO_UDP) + return "UDP46"; + else + return "UNKNOWN_IPV46"; + } else if(n->local.family == AF_INET6) { if (n->local.protocol == IPPROTO_TCP) return "TCP6"; @@ -35,6 +43,10 @@ static void print_local_listeners(LS_STATE *ls __maybe_unused, LOCAL_SOCKET *n, ipv4_address_to_txt(n->local.ip.ipv4, local_address); ipv4_address_to_txt(n->remote.ip.ipv4, remote_address); } + else if(is_local_socket_ipv46(n)) { + strncpyz(local_address, "*", sizeof(local_address) - 1); + remote_address[0] = '\0'; + } else if(n->local.family == AF_INET6) { ipv6_address_to_txt(&n->local.ip.ipv6, local_address); ipv6_address_to_txt(&n->remote.ip.ipv6, remote_address); @@ -93,8 +105,10 @@ int main(int argc, char **argv) { .cmdline = true, .comm = false, .namespaces = true, + .tcp_info = false, .max_errors = 10, + .max_concurrent_namespaces = 10, .cb = print_local_listeners, .data = NULL, @@ -212,6 +226,7 @@ int main(int argc, char **argv) { ls.config.comm = true; ls.config.cmdline = true; ls.config.namespaces = true; + ls.config.tcp_info = true; ls.config.uid = true; ls.config.max_errors = SIZE_MAX; ls.config.cb = print_local_listeners_debug; @@ -276,8 +291,17 @@ int main(int argc, char **argv) { } } + SPAWN_SERVER *spawn_server = spawn_server_create(SPAWN_SERVER_OPTION_CALLBACK, NULL, local_sockets_spawn_server_callback, argc, (const char **)argv); + if(spawn_server == NULL) { + fprintf(stderr, "Cannot create spawn server.\n"); + exit(1); + } + ls.spawn_server = spawn_server; + local_sockets_process(&ls); + spawn_server_destroy(spawn_server); + getrusage(RUSAGE_SELF, &ended); if(debug) { @@ -285,7 +309,7 @@ int main(int argc, char **argv) { unsigned long long system = ended.ru_stime.tv_sec * 1000000ULL + ended.ru_stime.tv_usec - started.ru_stime.tv_sec * 1000000ULL + started.ru_stime.tv_usec; unsigned long long total = user + system; - fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total\n", user, system, total); + fprintf(stderr, "CPU Usage %llu user, %llu system, %llu total, %zu namespaces, %zu nl requests (without namespaces)\n", user, system, total, ls.stats.namespaces_found, ls.stats.mnl_sends); } return 0; diff --git a/src/collectors/plugins.d/ndsudo.c b/src/collectors/plugins.d/ndsudo.c index d53ca9f28..d2cf4fae1 100644 --- a/src/collectors/plugins.d/ndsudo.c +++ b/src/collectors/plugins.d/ndsudo.c @@ -14,6 +14,31 @@ struct command { const char *search[MAX_SEARCH]; } allowed_commands[] = { { + .name = "exim-bpc", + .params = "-bpc", + .search = + { + [0] = "exim", + [1] = NULL, + }, + }, + { + .name = "nsd-control-stats", + .params = "stats_noreset", + .search = { + [0] = "nsd-control", + [1] = NULL, + }, + }, + { + .name = "chronyc-serverstats", + .params = "serverstats", + .search = { + [0] = "chronyc", + [1] = NULL, + }, + }, + { .name = "dmsetup-status-cache", .params = "status --target cache --noflush", .search = { @@ -38,6 +63,14 @@ struct command { }, }, { + .name = "smartctl-json-scan-open", + .params = "--json --scan-open", + .search = { + [0] = "smartctl", + [1] = NULL, + }, + }, + { .name = "smartctl-json-device-info", .params = "--json --all {{deviceName}} --device {{deviceType}} --nocheck {{powerMode}}", .search = { @@ -54,6 +87,14 @@ struct command { }, }, { + .name = "fail2ban-client-status-socket", + .params = "-s {{socket_path}} status", + .search = { + [0] = "fail2ban-client", + [1] = NULL, + }, + }, + { .name = "fail2ban-client-status-jail", .params = "status {{jail}}", .search = { @@ -62,6 +103,14 @@ struct command { }, }, { + .name = "fail2ban-client-status-jail-socket", + .params = "-s {{socket_path}} status {{jail}}", + .search = { + [0] = "fail2ban-client", + [1] = NULL, + }, + }, + { .name = "storcli-controllers-info", .params = "/cALL show all J nolog", .search = { diff --git a/src/collectors/plugins.d/plugins_d.c b/src/collectors/plugins.d/plugins_d.c index f5f55b770..85f1563c3 100644 --- a/src/collectors/plugins.d/plugins_d.c +++ b/src/collectors/plugins.d/plugins_d.c @@ -68,23 +68,15 @@ static void pluginsd_worker_thread_cleanup(void *pptr) { cd->unsafe.running = false; cd->unsafe.thread = 0; - pid_t pid = cd->unsafe.pid; cd->unsafe.pid = 0; - spinlock_unlock(&cd->unsafe.spinlock); - - if (pid) { - siginfo_t info; - netdata_log_info("PLUGINSD: 'host:%s', killing data collection child process with pid %d", - rrdhost_hostname(cd->host), pid); + POPEN_INSTANCE *pi = cd->unsafe.pi; + cd->unsafe.pi = NULL; - if (killpid(pid) != -1) { - netdata_log_info("PLUGINSD: 'host:%s', waiting for data collection child process pid %d to exit...", - rrdhost_hostname(cd->host), pid); + spinlock_unlock(&cd->unsafe.spinlock); - netdata_waitid(P_PID, (id_t)pid, &info, WEXITED); - } - } + if (pi) + spawn_popen_kill(pi); } #define SERIAL_FAILURES_THRESHOLD 10 @@ -160,14 +152,13 @@ static void *pluginsd_worker_thread(void *arg) { size_t count = 0; while(service_running(SERVICE_COLLECTORS)) { - FILE *fp_child_input = NULL; - FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input); - - if(unlikely(!fp_child_input || !fp_child_output)) { + cd->unsafe.pi = spawn_popen_run(cd->cmd); + if(!cd->unsafe.pi) { netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd); break; } + cd->unsafe.pid = spawn_server_instance_pid(cd->unsafe.pi->si); nd_log(NDLS_DAEMON, NDLP_DEBUG, "PLUGINSD: 'host:%s' connected to '%s' running on pid %d", @@ -190,15 +181,14 @@ static void *pluginsd_worker_thread(void *arg) { }; ND_LOG_STACK_PUSH(lgs); - count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0); + count = pluginsd_process(cd->host, cd, cd->unsafe.pi->child_stdin_fp, cd->unsafe.pi->child_stdout_fp, 0); nd_log(NDLS_DAEMON, NDLP_DEBUG, "PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).", rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count); - killpid(cd->unsafe.pid); - - int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid); + int worker_ret_code = spawn_popen_kill(cd->unsafe.pi); + cd->unsafe.pi = NULL; if(likely(worker_ret_code == 0)) pluginsd_worker_thread_handle_success(cd); @@ -248,13 +238,6 @@ void *pluginsd_main(void *ptr) { // disable some plugins by default config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO); - config_get_boolean(CONFIG_SECTION_PLUGINS, "logs-management", -#if defined(LOGS_MANAGEMENT_DEV_MODE) - CONFIG_BOOLEAN_YES -#else - CONFIG_BOOLEAN_NO -#endif - ); // it crashes (both threads) on Alpine after we made it multi-threaded // works with "--device /dev/ipmi0", but this is not default // see https://github.com/netdata/netdata/pull/15564 for details @@ -273,7 +256,7 @@ void *pluginsd_main(void *ptr) { if (unlikely(!service_running(SERVICE_COLLECTORS))) break; - errno = 0; + errno_clear(); DIR *dir = opendir(directory_name); if (unlikely(!dir)) { if (directory_errors[idx] != errno) { diff --git a/src/collectors/plugins.d/plugins_d.h b/src/collectors/plugins.d/plugins_d.h index ec17c3145..51efa5a72 100644 --- a/src/collectors/plugins.d/plugins_d.h +++ b/src/collectors/plugins.d/plugins_d.h @@ -34,6 +34,7 @@ struct plugind { bool running; // do not touch this structure after setting this to 1 bool enabled; // if this is enabled or not ND_THREAD *thread; + POPEN_INSTANCE *pi; pid_t pid; } unsafe; diff --git a/src/collectors/plugins.d/pluginsd_internals.c b/src/collectors/plugins.d/pluginsd_internals.c index d03daf745..31f0f7539 100644 --- a/src/collectors/plugins.d/pluginsd_internals.c +++ b/src/collectors/plugins.d/pluginsd_internals.c @@ -13,7 +13,7 @@ ssize_t send_to_plugin(const char *txt, void *data) { return h2o_stream_write(parser->h2o_ctx, txt, strlen(txt)); #endif - errno = 0; + errno_clear(); spinlock_lock(&parser->writer.spinlock); ssize_t bytes = -1; diff --git a/src/collectors/proc.plugin/integrations/zfs_pools.md b/src/collectors/proc.plugin/integrations/zfs_pools.md deleted file mode 100644 index f18c82baf..000000000 --- a/src/collectors/proc.plugin/integrations/zfs_pools.md +++ /dev/null @@ -1,105 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/proc.plugin/integrations/zfs_pools.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/proc.plugin/metadata.yaml" -sidebar_label: "ZFS Pools" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Linux Systems/Filesystem/ZFS" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# ZFS Pools - - -<img src="https://netdata.cloud/img/filesystem.svg" width="150"/> - - -Plugin: proc.plugin -Module: /proc/spl/kstat/zfs - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This integration provides metrics about the state of ZFS pools. - - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per zfs pool - - - -Labels: - -| Label | Description | -|:-----------|:----------------| -| pool | TBD | - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| zfspool.state | online, degraded, faulted, offline, removed, unavail, suspended | boolean | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ zfs_pool_state_warn ](https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf) | zfspool.state | ZFS pool ${label:pool} state is degraded | -| [ zfs_pool_state_crit ](https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf) | zfspool.state | ZFS pool ${label:pool} state is faulted or unavail | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - - - -There are no configuration options. - -#### Examples -There are no configuration examples. - - diff --git a/src/collectors/proc.plugin/ipc.c b/src/collectors/proc.plugin/ipc.c index 6d7d920f0..5b47116b9 100644 --- a/src/collectors/proc.plugin/ipc.c +++ b/src/collectors/proc.plugin/ipc.c @@ -6,6 +6,9 @@ #include <sys/msg.h> #include <sys/shm.h> +#define _COMMON_PLUGIN_NAME PLUGIN_PROC_NAME +#define _COMMON_PLUGIN_MODULE_NAME "ipc" +#include "../common-contexts/common-contexts.h" #ifndef SEMVMX #define SEMVMX 32767 /* <= 32767 semaphore maximum value */ @@ -282,8 +285,8 @@ int do_ipc(int update_every, usec_t dt) { static struct ipc_limits limits; static struct ipc_status status; static const RRDVAR_ACQUIRED *arrays_max = NULL, *semaphores_max = NULL; - static RRDSET *st_semaphores = NULL, *st_arrays = NULL; - static RRDDIM *rd_semaphores = NULL, *rd_arrays = NULL; + static RRDSET *st_arrays = NULL; + static RRDDIM *rd_arrays = NULL; static char *msg_filename = NULL; static struct message_queue *message_queue_root = NULL; static long long dimensions_limit; @@ -314,25 +317,7 @@ int do_ipc(int update_every, usec_t dt) { do_sem = CONFIG_BOOLEAN_NO; } else { - // create the charts - if(unlikely(!st_semaphores)) { - st_semaphores = rrdset_create_localhost( - "system" - , "ipc_semaphores" - , NULL - , "ipc semaphores" - , NULL - , "IPC Semaphores" - , "semaphores" - , PLUGIN_PROC_NAME - , "ipc" - , NETDATA_CHART_PRIO_SYSTEM_IPC_SEMAPHORES - , localhost->rrd_update_every - , RRDSET_TYPE_AREA - ); - rd_semaphores = rrddim_add(st_semaphores, "semaphores", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - + // create the chart if(unlikely(!st_arrays)) { st_arrays = rrdset_create_localhost( "system" @@ -379,7 +364,6 @@ int do_ipc(int update_every, usec_t dt) { rrdvar_host_variable_set(localhost, arrays_max, limits.semmni); st_arrays->red = limits.semmni; - st_semaphores->red = limits.semmns; read_limits_next = 60 / update_every; } @@ -392,8 +376,7 @@ int do_ipc(int update_every, usec_t dt) { return 0; } - rrddim_set_by_pointer(st_semaphores, rd_semaphores, status.semaem); - rrdset_done(st_semaphores); + common_semaphore_ipc(status.semaem, limits.semmns, "ipc", localhost->rrd_update_every); rrddim_set_by_pointer(st_arrays, rd_arrays, status.semusz); rrdset_done(st_arrays); diff --git a/src/collectors/proc.plugin/metadata.yaml b/src/collectors/proc.plugin/metadata.yaml index fd834dd38..6d9e00d32 100644 --- a/src/collectors/proc.plugin/metadata.yaml +++ b/src/collectors/proc.plugin/metadata.yaml @@ -4497,98 +4497,6 @@ modules: - name: retransmits - meta: plugin_name: proc.plugin - module_name: /proc/spl/kstat/zfs - monitored_instance: - name: ZFS Pools - link: "" - categories: - - data-collection.linux-systems.filesystem-metrics.zfs - icon_filename: "filesystem.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - zfs pools - - pools - - zfs - - filesystem - most_popular: false - overview: - data_collection: - metrics_description: "This integration provides metrics about the state of ZFS pools." - method_description: "" - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "" - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "" - description: "" - options: - description: "" - folding: - title: "" - enabled: true - list: [] - examples: - folding: - enabled: true - title: "" - list: [] - troubleshooting: - problems: - list: [] - alerts: - - name: zfs_pool_state_warn - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf - metric: zfspool.state - info: ZFS pool ${label:pool} state is degraded - - name: zfs_pool_state_crit - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/zfs.conf - metric: zfspool.state - info: ZFS pool ${label:pool} state is faulted or unavail - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: zfs pool - description: "" - labels: - - name: pool - description: TBD - metrics: - - name: zfspool.state - description: ZFS pool state - unit: "boolean" - chart_type: line - dimensions: - - name: online - - name: degraded - - name: faulted - - name: offline - - name: removed - - name: unavail - - name: suspended - - meta: - plugin_name: proc.plugin module_name: /proc/spl/kstat/zfs/arcstats monitored_instance: name: ZFS Adaptive Replacement Cache diff --git a/src/collectors/proc.plugin/plugin_proc.c b/src/collectors/proc.plugin/plugin_proc.c index 095cd7389..b4a856467 100644 --- a/src/collectors/proc.plugin/plugin_proc.c +++ b/src/collectors/proc.plugin/plugin_proc.c @@ -62,7 +62,6 @@ static struct proc_module { // ZFS metrics {.name = "/proc/spl/kstat/zfs/arcstats", .dim = "zfs_arcstats", .func = do_proc_spl_kstat_zfs_arcstats}, - {.name = "/proc/spl/kstat/zfs/pool/state",.dim = "zfs_pool_state",.func = do_proc_spl_kstat_zfs_pool_state}, // BTRFS metrics {.name = "/sys/fs/btrfs", .dim = "btrfs", .func = do_sys_fs_btrfs}, diff --git a/src/collectors/proc.plugin/plugin_proc.h b/src/collectors/proc.plugin/plugin_proc.h index a5f7ce6ec..bb1ddf48c 100644 --- a/src/collectors/proc.plugin/plugin_proc.h +++ b/src/collectors/proc.plugin/plugin_proc.h @@ -37,7 +37,6 @@ int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt); int do_proc_sys_devices_pci_aer(int update_every, usec_t dt); int do_proc_sys_devices_system_node(int update_every, usec_t dt); int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt); -int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt); int do_sys_fs_btrfs(int update_every, usec_t dt); int do_proc_net_sockstat(int update_every, usec_t dt); int do_proc_net_sockstat6(int update_every, usec_t dt); diff --git a/src/collectors/proc.plugin/proc_meminfo.c b/src/collectors/proc.plugin/proc_meminfo.c index c11b4f642..db458b239 100644 --- a/src/collectors/proc.plugin/proc_meminfo.c +++ b/src/collectors/proc.plugin/proc_meminfo.c @@ -29,7 +29,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { static ARL_BASE *arl_base = NULL; static ARL_ENTRY *arl_hwcorrupted = NULL, *arl_memavailable = NULL, *arl_hugepages_total = NULL, - *arl_zswapped = NULL, *arl_high_low = NULL, *arl_cma_total = NULL, + *arl_zswapped = NULL, *arl_high_low = NULL, *arl_directmap4k = NULL, *arl_directmap2m = NULL, *arl_directmap4m = NULL, *arl_directmap1g = NULL; static unsigned long long @@ -189,7 +189,7 @@ int do_proc_meminfo(int update_every, usec_t dt) { arl_expect(arl_base, "FilePmdMapped", &FilePmdMapped); // CONFIG_CMA - arl_cma_total = arl_expect(arl_base, "CmaTotal", &CmaTotal); + arl_expect(arl_base, "CmaTotal", &CmaTotal); arl_expect(arl_base, "CmaFree", &CmaFree); // CONFIG_UNACCEPTED_MEMORY diff --git a/src/collectors/proc.plugin/proc_spl_kstat_zfs.c b/src/collectors/proc.plugin/proc_spl_kstat_zfs.c index 53cc299b8..be96f4449 100644 --- a/src/collectors/proc.plugin/proc_spl_kstat_zfs.c +++ b/src/collectors/proc.plugin/proc_spl_kstat_zfs.c @@ -200,230 +200,3 @@ int do_proc_spl_kstat_zfs_arcstats(int update_every, usec_t dt) { return 0; } - -struct zfs_pool { - RRDSET *st; - - RRDDIM *rd_online; - RRDDIM *rd_degraded; - RRDDIM *rd_faulted; - RRDDIM *rd_offline; - RRDDIM *rd_removed; - RRDDIM *rd_unavail; - RRDDIM *rd_suspended; - - int updated; - int disabled; - - int online; - int degraded; - int faulted; - int offline; - int removed; - int unavail; - int suspended; -}; - -struct deleted_zfs_pool { - char *name; - struct deleted_zfs_pool *next; -} *deleted_zfs_pools = NULL; - -DICTIONARY *zfs_pools = NULL; - -void disable_zfs_pool_state(struct zfs_pool *pool) -{ - if (pool->st) - rrdset_is_obsolete___safe_from_collector_thread(pool->st); - - pool->st = NULL; - - pool->rd_online = NULL; - pool->rd_degraded = NULL; - pool->rd_faulted = NULL; - pool->rd_offline = NULL; - pool->rd_removed = NULL; - pool->rd_unavail = NULL; - pool->rd_suspended = NULL; - - pool->disabled = 1; -} - -int update_zfs_pool_state_chart(const DICTIONARY_ITEM *item, void *pool_p, void *update_every_p) { - const char *name = dictionary_acquired_item_name(item); - struct zfs_pool *pool = (struct zfs_pool *)pool_p; - int update_every = *(int *)update_every_p; - - if (pool->updated) { - pool->updated = 0; - - if (!pool->disabled) { - if (unlikely(!pool->st)) { - char chart_id[MAX_CHART_ID + 1]; - snprintf(chart_id, MAX_CHART_ID, "state_%s", name); - - pool->st = rrdset_create_localhost( - "zfspool", - chart_id, - NULL, - "state", - "zfspool.state", - "ZFS pool state", - "boolean", - PLUGIN_PROC_NAME, - ZFS_PROC_POOLS, - NETDATA_CHART_PRIO_ZFS_POOL_STATE, - update_every, - RRDSET_TYPE_LINE); - - pool->rd_online = rrddim_add(pool->st, "online", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_degraded = rrddim_add(pool->st, "degraded", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_faulted = rrddim_add(pool->st, "faulted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_offline = rrddim_add(pool->st, "offline", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_removed = rrddim_add(pool->st, "removed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_unavail = rrddim_add(pool->st, "unavail", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - pool->rd_suspended = rrddim_add(pool->st, "suspended", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - - rrdlabels_add(pool->st->rrdlabels, "pool", name, RRDLABEL_SRC_AUTO); - } - - rrddim_set_by_pointer(pool->st, pool->rd_online, pool->online); - rrddim_set_by_pointer(pool->st, pool->rd_degraded, pool->degraded); - rrddim_set_by_pointer(pool->st, pool->rd_faulted, pool->faulted); - rrddim_set_by_pointer(pool->st, pool->rd_offline, pool->offline); - rrddim_set_by_pointer(pool->st, pool->rd_removed, pool->removed); - rrddim_set_by_pointer(pool->st, pool->rd_unavail, pool->unavail); - rrddim_set_by_pointer(pool->st, pool->rd_suspended, pool->suspended); - rrdset_done(pool->st); - } - } else { - disable_zfs_pool_state(pool); - struct deleted_zfs_pool *new = callocz(1, sizeof(struct deleted_zfs_pool)); - new->name = strdupz(name); - new->next = deleted_zfs_pools; - deleted_zfs_pools = new; - } - - return 0; -} - -int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt) -{ - (void)dt; - - static int do_zfs_pool_state = -1; - static char *dirname = NULL; - - int pool_found = 0, state_file_found = 0; - - if (unlikely(do_zfs_pool_state == -1)) { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/spl/kstat/zfs"); - dirname = config_get("plugin:proc:" ZFS_PROC_POOLS, "directory to monitor", filename); - - zfs_pools = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED, &dictionary_stats_category_collectors, 0); - - do_zfs_pool_state = 1; - } - - if (likely(do_zfs_pool_state)) { - DIR *dir = opendir(dirname); - if (unlikely(!dir)) { - if (errno == ENOENT) - collector_info("Cannot read directory '%s'", dirname); - else - collector_error("Cannot read directory '%s'", dirname); - return 1; - } - - struct dirent *de = NULL; - while (likely(de = readdir(dir))) { - if (likely( - de->d_type == DT_DIR && ((de->d_name[0] == '.' && de->d_name[1] == '\0') || - (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')))) - continue; - - if (unlikely(de->d_type == DT_LNK || de->d_type == DT_DIR)) { - pool_found = 1; - - struct zfs_pool *pool = dictionary_get(zfs_pools, de->d_name); - - if (unlikely(!pool)) { - struct zfs_pool new_zfs_pool = {}; - pool = dictionary_set(zfs_pools, de->d_name, &new_zfs_pool, sizeof(struct zfs_pool)); - } - - pool->updated = 1; - - if (pool->disabled) { - state_file_found = 1; - continue; - } - - pool->online = 0; - pool->degraded = 0; - pool->faulted = 0; - pool->offline = 0; - pool->removed = 0; - pool->unavail = 0; - pool->suspended = 0; - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/%s/state", dirname, de->d_name); - - char state[STATE_SIZE + 1]; - int ret = read_txt_file(filename, state, sizeof(state)); - - if (!ret) { - state_file_found = 1; - - // ZFS pool states are described at https://openzfs.github.io/openzfs-docs/man/8/zpoolconcepts.8.html?#Device_Failure_and_Recovery - if (!strcmp(state, "ONLINE\n")) { - pool->online = 1; - } else if (!strcmp(state, "DEGRADED\n")) { - pool->degraded = 1; - } else if (!strcmp(state, "FAULTED\n")) { - pool->faulted = 1; - } else if (!strcmp(state, "OFFLINE\n")) { - pool->offline = 1; - } else if (!strcmp(state, "REMOVED\n")) { - pool->removed = 1; - } else if (!strcmp(state, "UNAVAIL\n")) { - pool->unavail = 1; - } else if (!strcmp(state, "SUSPENDED\n")) { - pool->suspended = 1; - } else { - disable_zfs_pool_state(pool); - - char *c = strchr(state, '\n'); - if (c) - *c = '\0'; - collector_error("ZFS POOLS: Undefined state %s for zpool %s, disabling the chart", state, de->d_name); - } - } - } - } - - closedir(dir); - } - - if (do_zfs_pool_state && pool_found && !state_file_found) { - collector_info("ZFS POOLS: State files not found. Disabling the module."); - do_zfs_pool_state = 0; - } - - if (do_zfs_pool_state) - dictionary_walkthrough_read(zfs_pools, update_zfs_pool_state_chart, &update_every); - - while (deleted_zfs_pools) { - struct deleted_zfs_pool *current_pool = deleted_zfs_pools; - dictionary_del(zfs_pools, current_pool->name); - - deleted_zfs_pools = deleted_zfs_pools->next; - - freez(current_pool->name); - freez(current_pool); - } - - return 0; -} diff --git a/src/collectors/proc.plugin/proc_stat.c b/src/collectors/proc.plugin/proc_stat.c index 838d00b8e..c211ceee5 100644 --- a/src/collectors/proc.plugin/proc_stat.c +++ b/src/collectors/proc.plugin/proc_stat.c @@ -752,33 +752,8 @@ int do_proc_stat(int update_every, usec_t dt) { } else if(unlikely(hash == hash_intr && strcmp(row_key, "intr") == 0)) { if(likely(do_interrupts)) { - static RRDSET *st_intr = NULL; - static RRDDIM *rd_interrupts = NULL; unsigned long long value = str2ull(procfile_lineword(ff, l, 1), NULL); - - if(unlikely(!st_intr)) { - st_intr = rrdset_create_localhost( - "system" - , "intr" - , NULL - , "interrupts" - , NULL - , "CPU Interrupts" - , "interrupts/s" - , PLUGIN_PROC_NAME - , PLUGIN_PROC_MODULE_STAT_NAME - , NETDATA_CHART_PRIO_SYSTEM_INTR - , update_every - , RRDSET_TYPE_LINE - ); - - rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL); - - rd_interrupts = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(st_intr, rd_interrupts, value); - rrdset_done(st_intr); + common_interrupts(value, update_every, NULL); } } else if(unlikely(hash == hash_ctxt && strcmp(row_key, "ctxt") == 0)) { diff --git a/src/collectors/python.d.plugin/alarms/README.md b/src/collectors/python.d.plugin/alarms/README.md deleted file mode 120000 index 85759ae6c..000000000 --- a/src/collectors/python.d.plugin/alarms/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/netdata_agent_alarms.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/alarms/alarms.chart.py b/src/collectors/python.d.plugin/alarms/alarms.chart.py deleted file mode 100644 index d19427358..000000000 --- a/src/collectors/python.d.plugin/alarms/alarms.chart.py +++ /dev/null @@ -1,95 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: alarms netdata python.d module -# Author: andrewm4894 -# SPDX-License-Identifier: GPL-3.0-or-later - -from json import loads - -from bases.FrameworkServices.UrlService import UrlService - -update_every = 10 -disabled_by_default = True - - -def charts_template(sm, alarm_status_chart_type='line'): - order = [ - 'alarms', - 'values' - ] - - mappings = ', '.join(['{0}={1}'.format(k, v) for k, v in sm.items()]) - charts = { - 'alarms': { - 'options': [None, 'Alarms ({0})'.format(mappings), 'status', 'status', 'alarms.status', alarm_status_chart_type], - 'lines': [], - 'variables': [ - ['alarms_num'], - ] - }, - 'values': { - 'options': [None, 'Alarm Values', 'value', 'value', 'alarms.value', 'line'], - 'lines': [], - } - } - return order, charts - - -DEFAULT_STATUS_MAP = {'CLEAR': 0, 'WARNING': 1, 'CRITICAL': 2} -DEFAULT_URL = 'http://127.0.0.1:19999/api/v1/alarms?all' -DEFAULT_COLLECT_ALARM_VALUES = False -DEFAULT_ALARM_STATUS_CHART_TYPE = 'line' -DEFAULT_ALARM_CONTAINS_WORDS = '' -DEFAULT_ALARM_EXCLUDES_WORDS = '' - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.sm = self.configuration.get('status_map', DEFAULT_STATUS_MAP) - self.alarm_status_chart_type = self.configuration.get('alarm_status_chart_type', DEFAULT_ALARM_STATUS_CHART_TYPE) - self.order, self.definitions = charts_template(self.sm, self.alarm_status_chart_type) - self.url = self.configuration.get('url', DEFAULT_URL) - self.collect_alarm_values = bool(self.configuration.get('collect_alarm_values', DEFAULT_COLLECT_ALARM_VALUES)) - self.collected_dims = {'alarms': set(), 'values': set()} - self.alarm_contains_words = self.configuration.get('alarm_contains_words', DEFAULT_ALARM_CONTAINS_WORDS) - self.alarm_contains_words_list = [alarm_contains_word.lstrip(' ').rstrip(' ') for alarm_contains_word in self.alarm_contains_words.split(',')] - self.alarm_excludes_words = self.configuration.get('alarm_excludes_words', DEFAULT_ALARM_EXCLUDES_WORDS) - self.alarm_excludes_words_list = [alarm_excludes_word.lstrip(' ').rstrip(' ') for alarm_excludes_word in self.alarm_excludes_words.split(',')] - - def _get_data(self): - raw_data = self._get_raw_data() - if raw_data is None: - return None - - raw_data = loads(raw_data) - alarms = raw_data.get('alarms', {}) - if self.alarm_contains_words != '': - alarms = {alarm_name: alarms[alarm_name] for alarm_name in alarms for alarm_contains_word in - self.alarm_contains_words_list if alarm_contains_word in alarm_name} - if self.alarm_excludes_words != '': - alarms = {alarm_name: alarms[alarm_name] for alarm_name in alarms for alarm_excludes_word in - self.alarm_excludes_words_list if alarm_excludes_word not in alarm_name} - - data = {a: self.sm[alarms[a]['status']] for a in alarms if alarms[a]['status'] in self.sm} - self.update_charts('alarms', data) - data['alarms_num'] = len(data) - - if self.collect_alarm_values: - data_values = {'{}_value'.format(a): alarms[a]['value'] * 100 for a in alarms if 'value' in alarms[a] and alarms[a]['value'] is not None} - self.update_charts('values', data_values, divisor=100) - data.update(data_values) - - return data - - def update_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): - if not self.charts: - return - - for dim in data: - if dim not in self.collected_dims[chart]: - self.collected_dims[chart].add(dim) - self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) - - for dim in list(self.collected_dims[chart]): - if dim not in data: - self.collected_dims[chart].remove(dim) - self.charts[chart].del_dimension(dim, hide=False) diff --git a/src/collectors/python.d.plugin/alarms/alarms.conf b/src/collectors/python.d.plugin/alarms/alarms.conf deleted file mode 100644 index 06d76c3b3..000000000 --- a/src/collectors/python.d.plugin/alarms/alarms.conf +++ /dev/null @@ -1,60 +0,0 @@ -# netdata python.d.plugin configuration for example -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 10 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) - -# what url to pull data from -local: - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - # define how to map alarm status to numbers for the chart - status_map: - CLEAR: 0 - WARNING: 1 - CRITICAL: 2 - # set to true to include a chart with calculated alarm values over time - collect_alarm_values: false - # define the type of chart for plotting status over time e.g. 'line' or 'stacked' - alarm_status_chart_type: 'line' - # a "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only - # alarms with "cpu" or "load" in alarm name. Default includes all. - alarm_contains_words: '' - # a "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude - # all alarms with "cpu" or "load" in alarm name. Default excludes None. - alarm_excludes_words: '' diff --git a/src/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md b/src/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md deleted file mode 100644 index 57be4f092..000000000 --- a/src/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md +++ /dev/null @@ -1,201 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/alarms/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/alarms/metadata.yaml" -sidebar_label: "Netdata Agent alarms" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Other" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Netdata Agent alarms - -Plugin: python.d.plugin -Module: alarms - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector creates an 'Alarms' menu with one line plot of `alarms.status`. - - -Alarm status is read from the Netdata agent rest api [`/api/v1/alarms?all`](https://learn.netdata.cloud/api#/alerts/alerts1). - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -It discovers instances of Netdata running on localhost, and gathers metrics from `http://127.0.0.1:19999/api/v1/alarms?all`. `CLEAR` status is mapped to `0`, `WARNING` to `1` and `CRITICAL` to `2`. Also, by default all alarms produced will be monitored. - - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Netdata Agent alarms instance - -These metrics refer to the entire monitored application. - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| alarms.status | a dimension per alarm representing the latest status of the alarm. | status | -| alarms.values | a dimension per alarm representing the latest collected value of the alarm. | value | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/alarms.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/alarms.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| url | Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. | http://127.0.0.1:19999/api/v1/alarms?all | yes | -| status_map | Mapping of alarm status to integer number that will be the metric value collected. | {"CLEAR": 0, "WARNING": 1, "CRITICAL": 2} | yes | -| collect_alarm_values | set to true to include a chart with calculated alarm values over time. | no | yes | -| alarm_status_chart_type | define the type of chart for plotting status over time e.g. 'line' or 'stacked'. | line | yes | -| alarm_contains_words | A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. | | yes | -| alarm_excludes_words | A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. | | yes | -| update_every | Sets the default data collection frequency. | 10 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | - -</details> - -#### Examples - -##### Basic - -A basic example configuration. - -```yaml -jobs: - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - -``` -##### Advanced - -An advanced example configuration with multiple jobs collecting different subsets of alarms for plotting on different charts. -"ML" job will collect status and values for all alarms with "ml_" in the name. Default job will collect status for all other alarms. - - -<details open><summary>Config</summary> - -```yaml -ML: - update_every: 5 - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - status_map: - CLEAR: 0 - WARNING: 1 - CRITICAL: 2 - collect_alarm_values: true - alarm_status_chart_type: 'stacked' - alarm_contains_words: 'ml_' - -Default: - update_every: 5 - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - status_map: - CLEAR: 0 - WARNING: 1 - CRITICAL: 2 - collect_alarm_values: false - alarm_status_chart_type: 'stacked' - alarm_excludes_words: 'ml_' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `alarms` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin alarms debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/alarms/metadata.yaml b/src/collectors/python.d.plugin/alarms/metadata.yaml deleted file mode 100644 index b6bee7594..000000000 --- a/src/collectors/python.d.plugin/alarms/metadata.yaml +++ /dev/null @@ -1,177 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: alarms - monitored_instance: - name: Netdata Agent alarms - link: /src/collectors/python.d.plugin/alarms/README.md - categories: - - data-collection.other - icon_filename: "" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - alarms - - netdata - most_popular: false - overview: - data_collection: - metrics_description: | - This collector creates an 'Alarms' menu with one line plot of `alarms.status`. - method_description: | - Alarm status is read from the Netdata agent rest api [`/api/v1/alarms?all`](https://learn.netdata.cloud/api#/alerts/alerts1). - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: | - It discovers instances of Netdata running on localhost, and gathers metrics from `http://127.0.0.1:19999/api/v1/alarms?all`. `CLEAR` status is mapped to `0`, `WARNING` to `1` and `CRITICAL` to `2`. Also, by default all alarms produced will be monitored. - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: python.d/alarms.conf - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: Config options - enabled: true - list: - - name: url - description: Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. - default_value: http://127.0.0.1:19999/api/v1/alarms?all - required: true - - name: status_map - description: Mapping of alarm status to integer number that will be the metric value collected. - default_value: '{"CLEAR": 0, "WARNING": 1, "CRITICAL": 2}' - required: true - - name: collect_alarm_values - description: set to true to include a chart with calculated alarm values over time. - default_value: false - required: true - - name: alarm_status_chart_type - description: define the type of chart for plotting status over time e.g. 'line' or 'stacked'. - default_value: "line" - required: true - - name: alarm_contains_words - description: > - A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. - default_value: "" - required: true - - name: alarm_excludes_words - description: > - A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. - default_value: "" - required: true - - name: update_every - description: Sets the default data collection frequency. - default_value: 10 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - examples: - folding: - enabled: true - title: Config - list: - - name: Basic - folding: - enabled: false - description: A basic example configuration. - config: | - jobs: - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - - name: Advanced - folding: - enabled: true - description: | - An advanced example configuration with multiple jobs collecting different subsets of alarms for plotting on different charts. - "ML" job will collect status and values for all alarms with "ml_" in the name. Default job will collect status for all other alarms. - config: | - ML: - update_every: 5 - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - status_map: - CLEAR: 0 - WARNING: 1 - CRITICAL: 2 - collect_alarm_values: true - alarm_status_chart_type: 'stacked' - alarm_contains_words: 'ml_' - - Default: - update_every: 5 - url: 'http://127.0.0.1:19999/api/v1/alarms?all' - status_map: - CLEAR: 0 - WARNING: 1 - CRITICAL: 2 - collect_alarm_values: false - alarm_status_chart_type: 'stacked' - alarm_excludes_words: 'ml_' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: | - These metrics refer to the entire monitored application. - labels: [] - metrics: - - name: alarms.status - description: Alarms ({status mapping}) - unit: "status" - chart_type: line - dimensions: - - name: a dimension per alarm representing the latest status of the alarm. - - name: alarms.values - description: Alarm Values - unit: "value" - chart_type: line - dimensions: - - name: a dimension per alarm representing the latest collected value of the alarm. diff --git a/src/collectors/python.d.plugin/am2320/integrations/am2320.md b/src/collectors/python.d.plugin/am2320/integrations/am2320.md index f96657624..ea0e505c2 100644 --- a/src/collectors/python.d.plugin/am2320/integrations/am2320.md +++ b/src/collectors/python.d.plugin/am2320/integrations/am2320.md @@ -156,6 +156,7 @@ local_sensor: ### Debug Mode + To troubleshoot issues with the `am2320` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -178,4 +179,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin am2320 debug trace ``` +### Getting Logs + +If you're encountering problems with the `am2320` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep am2320 +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep am2320 /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep am2320 +``` + diff --git a/src/collectors/python.d.plugin/beanstalk/README.md b/src/collectors/python.d.plugin/beanstalk/README.md deleted file mode 120000 index 4efe13889..000000000 --- a/src/collectors/python.d.plugin/beanstalk/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/beanstalk.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/beanstalk/beanstalk.chart.py b/src/collectors/python.d.plugin/beanstalk/beanstalk.chart.py deleted file mode 100644 index 396543e5a..000000000 --- a/src/collectors/python.d.plugin/beanstalk/beanstalk.chart.py +++ /dev/null @@ -1,252 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: beanstalk netdata python.d module -# Author: ilyam8 -# SPDX-License-Identifier: GPL-3.0-or-later - -try: - import beanstalkc - - BEANSTALKC = True -except ImportError: - BEANSTALKC = False - -from bases.FrameworkServices.SimpleService import SimpleService -from bases.loaders import load_yaml - -ORDER = [ - 'cpu_usage', - 'jobs_rate', - 'connections_rate', - 'commands_rate', - 'current_tubes', - 'current_jobs', - 'current_connections', - 'binlog', - 'uptime', -] - -CHARTS = { - 'cpu_usage': { - 'options': [None, 'Cpu Usage', 'cpu time', 'server statistics', 'beanstalk.cpu_usage', 'area'], - 'lines': [ - ['rusage-utime', 'user', 'incremental'], - ['rusage-stime', 'system', 'incremental'] - ] - }, - 'jobs_rate': { - 'options': [None, 'Jobs Rate', 'jobs/s', 'server statistics', 'beanstalk.jobs_rate', 'line'], - 'lines': [ - ['total-jobs', 'total', 'incremental'], - ['job-timeouts', 'timeouts', 'incremental'] - ] - }, - 'connections_rate': { - 'options': [None, 'Connections Rate', 'connections/s', 'server statistics', 'beanstalk.connections_rate', - 'area'], - 'lines': [ - ['total-connections', 'connections', 'incremental'] - ] - }, - 'commands_rate': { - 'options': [None, 'Commands Rate', 'commands/s', 'server statistics', 'beanstalk.commands_rate', 'stacked'], - 'lines': [ - ['cmd-put', 'put', 'incremental'], - ['cmd-peek', 'peek', 'incremental'], - ['cmd-peek-ready', 'peek-ready', 'incremental'], - ['cmd-peek-delayed', 'peek-delayed', 'incremental'], - ['cmd-peek-buried', 'peek-buried', 'incremental'], - ['cmd-reserve', 'reserve', 'incremental'], - ['cmd-use', 'use', 'incremental'], - ['cmd-watch', 'watch', 'incremental'], - ['cmd-ignore', 'ignore', 'incremental'], - ['cmd-delete', 'delete', 'incremental'], - ['cmd-release', 'release', 'incremental'], - ['cmd-bury', 'bury', 'incremental'], - ['cmd-kick', 'kick', 'incremental'], - ['cmd-stats', 'stats', 'incremental'], - ['cmd-stats-job', 'stats-job', 'incremental'], - ['cmd-stats-tube', 'stats-tube', 'incremental'], - ['cmd-list-tubes', 'list-tubes', 'incremental'], - ['cmd-list-tube-used', 'list-tube-used', 'incremental'], - ['cmd-list-tubes-watched', 'list-tubes-watched', 'incremental'], - ['cmd-pause-tube', 'pause-tube', 'incremental'] - ] - }, - 'current_tubes': { - 'options': [None, 'Current Tubes', 'tubes', 'server statistics', 'beanstalk.current_tubes', 'area'], - 'lines': [ - ['current-tubes', 'tubes'] - ] - }, - 'current_jobs': { - 'options': [None, 'Current Jobs', 'jobs', 'server statistics', 'beanstalk.current_jobs', 'stacked'], - 'lines': [ - ['current-jobs-urgent', 'urgent'], - ['current-jobs-ready', 'ready'], - ['current-jobs-reserved', 'reserved'], - ['current-jobs-delayed', 'delayed'], - ['current-jobs-buried', 'buried'] - ] - }, - 'current_connections': { - 'options': [None, 'Current Connections', 'connections', 'server statistics', - 'beanstalk.current_connections', 'line'], - 'lines': [ - ['current-connections', 'written'], - ['current-producers', 'producers'], - ['current-workers', 'workers'], - ['current-waiting', 'waiting'] - ] - }, - 'binlog': { - 'options': [None, 'Binlog', 'records/s', 'server statistics', 'beanstalk.binlog', 'line'], - 'lines': [ - ['binlog-records-written', 'written', 'incremental'], - ['binlog-records-migrated', 'migrated', 'incremental'] - ] - }, - 'uptime': { - 'options': [None, 'Uptime', 'seconds', 'server statistics', 'beanstalk.uptime', 'line'], - 'lines': [ - ['uptime'], - ] - } -} - - -def tube_chart_template(name): - order = [ - '{0}_jobs_rate'.format(name), - '{0}_jobs'.format(name), - '{0}_connections'.format(name), - '{0}_commands'.format(name), - '{0}_pause'.format(name) - ] - family = 'tube {0}'.format(name) - - charts = { - order[0]: { - 'options': [None, 'Job Rate', 'jobs/s', family, 'beanstalk.jobs_rate', 'area'], - 'lines': [ - ['_'.join([name, 'total-jobs']), 'jobs', 'incremental'] - ] - }, - order[1]: { - 'options': [None, 'Jobs', 'jobs', family, 'beanstalk.jobs', 'stacked'], - 'lines': [ - ['_'.join([name, 'current-jobs-urgent']), 'urgent'], - ['_'.join([name, 'current-jobs-ready']), 'ready'], - ['_'.join([name, 'current-jobs-reserved']), 'reserved'], - ['_'.join([name, 'current-jobs-delayed']), 'delayed'], - ['_'.join([name, 'current-jobs-buried']), 'buried'] - ] - }, - order[2]: { - 'options': [None, 'Connections', 'connections', family, 'beanstalk.connections', 'stacked'], - 'lines': [ - ['_'.join([name, 'current-using']), 'using'], - ['_'.join([name, 'current-waiting']), 'waiting'], - ['_'.join([name, 'current-watching']), 'watching'] - ] - }, - order[3]: { - 'options': [None, 'Commands', 'commands/s', family, 'beanstalk.commands', 'stacked'], - 'lines': [ - ['_'.join([name, 'cmd-delete']), 'deletes', 'incremental'], - ['_'.join([name, 'cmd-pause-tube']), 'pauses', 'incremental'] - ] - }, - order[4]: { - 'options': [None, 'Pause', 'seconds', family, 'beanstalk.pause', 'stacked'], - 'lines': [ - ['_'.join([name, 'pause']), 'since'], - ['_'.join([name, 'pause-time-left']), 'left'] - ] - } - } - - return order, charts - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.configuration = configuration - self.order = list(ORDER) - self.definitions = dict(CHARTS) - self.conn = None - self.alive = True - - def check(self): - if not BEANSTALKC: - self.error("'beanstalkc' module is needed to use beanstalk.chart.py") - return False - - self.conn = self.connect() - - return True if self.conn else False - - def get_data(self): - """ - :return: dict - """ - if not self.is_alive(): - return None - - active_charts = self.charts.active_charts() - data = dict() - - try: - data.update(self.conn.stats()) - - for tube in self.conn.tubes(): - stats = self.conn.stats_tube(tube) - - if tube + '_jobs_rate' not in active_charts: - self.create_new_tube_charts(tube) - - for stat in stats: - data['_'.join([tube, stat])] = stats[stat] - - except beanstalkc.SocketError: - self.alive = False - return None - - return data or None - - def create_new_tube_charts(self, tube): - order, charts = tube_chart_template(tube) - - for chart_name in order: - params = [chart_name] + charts[chart_name]['options'] - dimensions = charts[chart_name]['lines'] - - new_chart = self.charts.add_chart(params) - for dimension in dimensions: - new_chart.add_dimension(dimension) - - def connect(self): - host = self.configuration.get('host', '127.0.0.1') - port = self.configuration.get('port', 11300) - timeout = self.configuration.get('timeout', 1) - try: - return beanstalkc.Connection(host=host, - port=port, - connect_timeout=timeout, - parse_yaml=load_yaml) - except beanstalkc.SocketError as error: - self.error('Connection to {0}:{1} failed: {2}'.format(host, port, error)) - return None - - def reconnect(self): - try: - self.conn.reconnect() - self.alive = True - return True - except beanstalkc.SocketError: - return False - - def is_alive(self): - if not self.alive: - return self.reconnect() - return True diff --git a/src/collectors/python.d.plugin/beanstalk/beanstalk.conf b/src/collectors/python.d.plugin/beanstalk/beanstalk.conf deleted file mode 100644 index 6d9773a19..000000000 --- a/src/collectors/python.d.plugin/beanstalk/beanstalk.conf +++ /dev/null @@ -1,78 +0,0 @@ -# netdata python.d.plugin configuration for beanstalk -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# chart_cleanup sets the default chart cleanup interval in iterations. -# A chart is marked as obsolete if it has not been updated -# 'chart_cleanup' iterations in a row. -# When a plugin sends the obsolete flag, the charts are not deleted -# from netdata immediately. -# They will be hidden immediately (not offered to dashboard viewer, -# streamed upstream and archived to external databases) and deleted one hour -# later (configurable from netdata.conf). -# chart_cleanup: 10 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# chart_cleanup: 10 # the JOB's chart cleanup interval in iterations -# -# Additionally to the above, beanstalk also supports the following: -# -# host: 'host' # Server ip address or hostname. Default: 127.0.0.1 -# port: port # Beanstalkd port. Default: -# -# ---------------------------------------------------------------------- diff --git a/src/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md b/src/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md deleted file mode 100644 index 841444354..000000000 --- a/src/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md +++ /dev/null @@ -1,219 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/beanstalk/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/beanstalk/metadata.yaml" -sidebar_label: "Beanstalk" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Message Brokers" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Beanstalk - - -<img src="https://netdata.cloud/img/beanstalk.svg" width="150"/> - - -Plugin: python.d.plugin -Module: beanstalk - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management. - -The collector uses the `beanstalkc` python module to connect to a `beanstalkd` service and gather metrics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If no configuration is given, module will attempt to connect to beanstalkd on 127.0.0.1:11300 address. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Beanstalk instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| beanstalk.cpu_usage | user, system | cpu time | -| beanstalk.jobs_rate | total, timeouts | jobs/s | -| beanstalk.connections_rate | connections | connections/s | -| beanstalk.commands_rate | put, peek, peek-ready, peek-delayed, peek-buried, reserve, use, watch, ignore, delete, bury, kick, stats, stats-job, stats-tube, list-tubes, list-tube-used, list-tubes-watched, pause-tube | commands/s | -| beanstalk.current_tubes | tubes | tubes | -| beanstalk.current_jobs | urgent, ready, reserved, delayed, buried | jobs | -| beanstalk.current_connections | written, producers, workers, waiting | connections | -| beanstalk.binlog | written, migrated | records/s | -| beanstalk.uptime | uptime | seconds | - -### Per tube - -Metrics related to Beanstalk tubes. Each tube produces its own set of the following metrics. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| beanstalk.jobs_rate | jobs | jobs/s | -| beanstalk.jobs | urgent, ready, reserved, delayed, buried | jobs | -| beanstalk.connections | using, waiting, watching | connections | -| beanstalk.commands | deletes, pauses | commands/s | -| beanstalk.pause | since, left | seconds | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ beanstalk_server_buried_jobs ](https://github.com/netdata/netdata/blob/master/src/health/health.d/beanstalkd.conf) | beanstalk.current_jobs | number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. | - - -## Setup - -### Prerequisites - -#### beanstalkc python module - -The collector requires the `beanstalkc` python module to be installed. - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/beanstalk.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/beanstalk.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| host | IP or URL to a beanstalk service. | 127.0.0.1 | no | -| port | Port to the IP or URL to a beanstalk service. | 11300 | no | - -</details> - -#### Examples - -##### Remote beanstalk server - -A basic remote beanstalk server - -```yaml -remote: - name: 'beanstalk' - host: '1.2.3.4' - port: 11300 - -``` -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local_beanstalk' - host: '127.0.0.1' - port: 11300 - -remote_job: - name: 'remote_beanstalk' - host: '192.0.2.1' - port: 113000 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `beanstalk` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin beanstalk debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/beanstalk/metadata.yaml b/src/collectors/python.d.plugin/beanstalk/metadata.yaml deleted file mode 100644 index 5e370f0a0..000000000 --- a/src/collectors/python.d.plugin/beanstalk/metadata.yaml +++ /dev/null @@ -1,263 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: beanstalk - monitored_instance: - name: Beanstalk - link: "https://beanstalkd.github.io/" - categories: - - data-collection.message-brokers - #- data-collection.task-queues - icon_filename: "beanstalk.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - beanstalk - - beanstalkd - - message - most_popular: false - overview: - data_collection: - metrics_description: "Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management." - method_description: "The collector uses the `beanstalkc` python module to connect to a `beanstalkd` service and gather metrics." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "If no configuration is given, module will attempt to connect to beanstalkd on 127.0.0.1:11300 address." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "beanstalkc python module" - description: The collector requires the `beanstalkc` python module to be installed. - configuration: - file: - name: python.d/beanstalk.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: host - description: IP or URL to a beanstalk service. - default_value: "127.0.0.1" - required: false - - name: port - description: Port to the IP or URL to a beanstalk service. - default_value: "11300" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Remote beanstalk server - description: A basic remote beanstalk server - folding: - enabled: false - config: | - remote: - name: 'beanstalk' - host: '1.2.3.4' - port: 11300 - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - localhost: - name: 'local_beanstalk' - host: '127.0.0.1' - port: 11300 - - remote_job: - name: 'remote_beanstalk' - host: '192.0.2.1' - port: 113000 - troubleshooting: - problems: - list: [] - alerts: - - name: beanstalk_server_buried_jobs - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/beanstalkd.conf - metric: beanstalk.current_jobs - info: number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: beanstalk.cpu_usage - description: Cpu Usage - unit: "cpu time" - chart_type: area - dimensions: - - name: user - - name: system - - name: beanstalk.jobs_rate - description: Jobs Rate - unit: "jobs/s" - chart_type: line - dimensions: - - name: total - - name: timeouts - - name: beanstalk.connections_rate - description: Connections Rate - unit: "connections/s" - chart_type: area - dimensions: - - name: connections - - name: beanstalk.commands_rate - description: Commands Rate - unit: "commands/s" - chart_type: stacked - dimensions: - - name: put - - name: peek - - name: peek-ready - - name: peek-delayed - - name: peek-buried - - name: reserve - - name: use - - name: watch - - name: ignore - - name: delete - - name: bury - - name: kick - - name: stats - - name: stats-job - - name: stats-tube - - name: list-tubes - - name: list-tube-used - - name: list-tubes-watched - - name: pause-tube - - name: beanstalk.current_tubes - description: Current Tubes - unit: "tubes" - chart_type: area - dimensions: - - name: tubes - - name: beanstalk.current_jobs - description: Current Jobs - unit: "jobs" - chart_type: stacked - dimensions: - - name: urgent - - name: ready - - name: reserved - - name: delayed - - name: buried - - name: beanstalk.current_connections - description: Current Connections - unit: "connections" - chart_type: line - dimensions: - - name: written - - name: producers - - name: workers - - name: waiting - - name: beanstalk.binlog - description: Binlog - unit: "records/s" - chart_type: line - dimensions: - - name: written - - name: migrated - - name: beanstalk.uptime - description: seconds - unit: "seconds" - chart_type: line - dimensions: - - name: uptime - - name: tube - description: "Metrics related to Beanstalk tubes. Each tube produces its own set of the following metrics." - labels: [] - metrics: - - name: beanstalk.jobs_rate - description: Jobs Rate - unit: "jobs/s" - chart_type: area - dimensions: - - name: jobs - - name: beanstalk.jobs - description: Jobs - unit: "jobs" - chart_type: stacked - dimensions: - - name: urgent - - name: ready - - name: reserved - - name: delayed - - name: buried - - name: beanstalk.connections - description: Connections - unit: "connections" - chart_type: stacked - dimensions: - - name: using - - name: waiting - - name: watching - - name: beanstalk.commands - description: Commands - unit: "commands/s" - chart_type: stacked - dimensions: - - name: deletes - - name: pauses - - name: beanstalk.pause - description: Pause - unit: "seconds" - chart_type: stacked - dimensions: - - name: since - - name: left diff --git a/src/collectors/python.d.plugin/boinc/integrations/boinc.md b/src/collectors/python.d.plugin/boinc/integrations/boinc.md index 2e5ff5c4f..d5fcac215 100644 --- a/src/collectors/python.d.plugin/boinc/integrations/boinc.md +++ b/src/collectors/python.d.plugin/boinc/integrations/boinc.md @@ -179,6 +179,7 @@ remote_job: ### Debug Mode + To troubleshoot issues with the `boinc` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -201,4 +202,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin boinc debug trace ``` +### Getting Logs + +If you're encountering problems with the `boinc` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep boinc +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep boinc /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep boinc +``` + diff --git a/src/collectors/python.d.plugin/ceph/integrations/ceph.md b/src/collectors/python.d.plugin/ceph/integrations/ceph.md index 2b49a331d..d2584a4d0 100644 --- a/src/collectors/python.d.plugin/ceph/integrations/ceph.md +++ b/src/collectors/python.d.plugin/ceph/integrations/ceph.md @@ -169,6 +169,7 @@ local: ### Debug Mode + To troubleshoot issues with the `ceph` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -191,4 +192,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin ceph debug trace ``` +### Getting Logs + +If you're encountering problems with the `ceph` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep ceph +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep ceph /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep ceph +``` + diff --git a/src/collectors/python.d.plugin/changefinder/README.md b/src/collectors/python.d.plugin/changefinder/README.md deleted file mode 120000 index 0ca704eb1..000000000 --- a/src/collectors/python.d.plugin/changefinder/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/python.d_changefinder.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/changefinder/changefinder.chart.py b/src/collectors/python.d.plugin/changefinder/changefinder.chart.py deleted file mode 100644 index 2a69cd9f5..000000000 --- a/src/collectors/python.d.plugin/changefinder/changefinder.chart.py +++ /dev/null @@ -1,185 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: changefinder netdata python.d module -# Author: andrewm4894 -# SPDX-License-Identifier: GPL-3.0-or-later - -from json import loads -import re - -from bases.FrameworkServices.UrlService import UrlService - -import numpy as np -import changefinder -from scipy.stats import percentileofscore - -update_every = 5 -disabled_by_default = True - -ORDER = [ - 'scores', - 'flags' -] - -CHARTS = { - 'scores': { - 'options': [None, 'ChangeFinder', 'score', 'Scores', 'changefinder.scores', 'line'], - 'lines': [] - }, - 'flags': { - 'options': [None, 'ChangeFinder', 'flag', 'Flags', 'changefinder.flags', 'stacked'], - 'lines': [] - } -} - -DEFAULT_PROTOCOL = 'http' -DEFAULT_HOST = '127.0.0.1:19999' -DEFAULT_CHARTS_REGEX = 'system.*' -DEFAULT_MODE = 'per_chart' -DEFAULT_CF_R = 0.5 -DEFAULT_CF_ORDER = 1 -DEFAULT_CF_SMOOTH = 15 -DEFAULT_CF_DIFF = False -DEFAULT_CF_THRESHOLD = 99 -DEFAULT_N_SCORE_SAMPLES = 14400 -DEFAULT_SHOW_SCORES = False - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.protocol = self.configuration.get('protocol', DEFAULT_PROTOCOL) - self.host = self.configuration.get('host', DEFAULT_HOST) - self.url = '{}://{}/api/v1/allmetrics?format=json'.format(self.protocol, self.host) - self.charts_regex = re.compile(self.configuration.get('charts_regex', DEFAULT_CHARTS_REGEX)) - self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',') - self.mode = self.configuration.get('mode', DEFAULT_MODE) - self.n_score_samples = int(self.configuration.get('n_score_samples', DEFAULT_N_SCORE_SAMPLES)) - self.show_scores = int(self.configuration.get('show_scores', DEFAULT_SHOW_SCORES)) - self.cf_r = float(self.configuration.get('cf_r', DEFAULT_CF_R)) - self.cf_order = int(self.configuration.get('cf_order', DEFAULT_CF_ORDER)) - self.cf_smooth = int(self.configuration.get('cf_smooth', DEFAULT_CF_SMOOTH)) - self.cf_diff = bool(self.configuration.get('cf_diff', DEFAULT_CF_DIFF)) - self.cf_threshold = float(self.configuration.get('cf_threshold', DEFAULT_CF_THRESHOLD)) - self.collected_dims = {'scores': set(), 'flags': set()} - self.models = {} - self.x_latest = {} - self.scores_latest = {} - self.scores_samples = {} - - def get_score(self, x, model): - """Update the score for the model based on most recent data, flag if it's percentile passes self.cf_threshold. - """ - - # get score - if model not in self.models: - # initialise empty model if needed - self.models[model] = changefinder.ChangeFinder(r=self.cf_r, order=self.cf_order, smooth=self.cf_smooth) - # if the update for this step fails then just fallback to last known score - try: - score = self.models[model].update(x) - self.scores_latest[model] = score - except Exception as _: - score = self.scores_latest.get(model, 0) - score = 0 if np.isnan(score) else score - - # update sample scores used to calculate percentiles - if model in self.scores_samples: - self.scores_samples[model].append(score) - else: - self.scores_samples[model] = [score] - self.scores_samples[model] = self.scores_samples[model][-self.n_score_samples:] - - # convert score to percentile - score = percentileofscore(self.scores_samples[model], score) - - # flag based on score percentile - flag = 1 if score >= self.cf_threshold else 0 - - return score, flag - - def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1): - """If dimension not in chart then add it. - """ - if not self.charts: - return - - for dim in data: - if dim not in self.collected_dims[chart]: - self.collected_dims[chart].add(dim) - self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor]) - - for dim in list(self.collected_dims[chart]): - if dim not in data: - self.collected_dims[chart].remove(dim) - self.charts[chart].del_dimension(dim, hide=False) - - def diff(self, x, model): - """Take difference of data. - """ - x_diff = x - self.x_latest.get(model, 0) - self.x_latest[model] = x - x = x_diff - return x - - def _get_data(self): - - # pull data from self.url - raw_data = self._get_raw_data() - if raw_data is None: - return None - - raw_data = loads(raw_data) - - # filter to just the data for the charts specified - charts_in_scope = list(filter(self.charts_regex.match, raw_data.keys())) - charts_in_scope = [c for c in charts_in_scope if c not in self.charts_to_exclude] - - data_score = {} - data_flag = {} - - # process each chart - for chart in charts_in_scope: - - if self.mode == 'per_chart': - - # average dims on chart and run changefinder on that average - x = [raw_data[chart]['dimensions'][dim]['value'] for dim in raw_data[chart]['dimensions']] - x = [x for x in x if x is not None] - - if len(x) > 0: - - x = sum(x) / len(x) - x = self.diff(x, chart) if self.cf_diff else x - - score, flag = self.get_score(x, chart) - if self.show_scores: - data_score['{}_score'.format(chart)] = score * 100 - data_flag[chart] = flag - - else: - - # run changefinder on each individual dim - for dim in raw_data[chart]['dimensions']: - - chart_dim = '{}|{}'.format(chart, dim) - - x = raw_data[chart]['dimensions'][dim]['value'] - x = x if x else 0 - x = self.diff(x, chart_dim) if self.cf_diff else x - - score, flag = self.get_score(x, chart_dim) - if self.show_scores: - data_score['{}_score'.format(chart_dim)] = score * 100 - data_flag[chart_dim] = flag - - self.validate_charts('flags', data_flag) - - if self.show_scores & len(data_score) > 0: - data_score['average_score'] = sum(data_score.values()) / len(data_score) - self.validate_charts('scores', data_score, divisor=100) - - data = {**data_score, **data_flag} - - return data diff --git a/src/collectors/python.d.plugin/changefinder/changefinder.conf b/src/collectors/python.d.plugin/changefinder/changefinder.conf deleted file mode 100644 index 56a681f1e..000000000 --- a/src/collectors/python.d.plugin/changefinder/changefinder.conf +++ /dev/null @@ -1,74 +0,0 @@ -# netdata python.d.plugin configuration for example -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 5 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) - -local: - - # A friendly name for this job. - name: 'local' - - # What host to pull data from. - host: '127.0.0.1:19999' - - # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. - charts_regex: 'system\..*' - - # Charts to exclude, useful if you would like to exclude some specific charts. - # Note: should be a ',' separated string like 'chart.name,chart.name'. - charts_to_exclude: '' - - # Get ChangeFinder scores 'per_dim' or 'per_chart'. - mode: 'per_chart' - - # Default parameters that can be passed to the changefinder library. - cf_r: 0.5 - cf_order: 1 - cf_smooth: 15 - - # The percentile above which scores will be flagged. - cf_threshold: 99 - - # The number of recent scores to use when calculating the percentile of the changefinder score. - n_score_samples: 14400 - - # Set to true if you also want to chart the percentile scores in addition to the flags. - # Mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time. - show_scores: false diff --git a/src/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md b/src/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md deleted file mode 100644 index fe370baac..000000000 --- a/src/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md +++ /dev/null @@ -1,217 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/changefinder/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/changefinder/metadata.yaml" -sidebar_label: "python.d changefinder" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Other" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# python.d changefinder - -Plugin: python.d.plugin -Module: changefinder - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to -perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection) -on your Netdata charts and/or dimensions. - - -Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a changepoint score for each chart or dimension you configure it to work on. This is an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap to compute at each step of data collection (see the notes section below for more details) and it should scale fairly well to work on lots of charts or hosts (if running on a parent node for example). -### Notes - It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's - typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly - this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw - score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have - already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then - should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning - approaches which need some initial window of time before they can be useful. -- As this collector does most of the work in Python itself, you may want to try it out first on a test or development - system to get a sense of its performance characteristics on a node similar to where you would like to use it. -- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the - typical performance characteristics we saw from running this collector (with defaults) were: - - A runtime (`netdata.runtime_changefinder`) of ~30ms. - - Typically ~1% additional cpu usage. - - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration. - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default this collector will work over all `system.*` charts. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per python.d changefinder instance - - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| changefinder.scores | a dimension per chart | score | -| changefinder.flags | a dimension per chart | flag | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Python Requirements - -This collector will only work with Python 3 and requires the packages below be installed. - -```bash -# become netdata user -sudo su -s /bin/bash netdata -# install required packages for the netdata user -pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4 -``` - -**Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section -of your `netdata.conf` file. - -```yaml -[ plugin:python.d ] - # update every = 1 - command options = -ppython3 -``` - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/changefinder.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/changefinder.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes | -| charts_to_exclude | charts to exclude, useful if you would like to exclude some specific charts. note: should be a ',' separated string like 'chart.name,chart.name'. | | no | -| mode | get ChangeFinder scores 'per_dim' or 'per_chart'. | per_chart | yes | -| cf_r | default parameters that can be passed to the changefinder library. | 0.5 | no | -| cf_order | default parameters that can be passed to the changefinder library. | 1 | no | -| cf_smooth | default parameters that can be passed to the changefinder library. | 15 | no | -| cf_threshold | the percentile above which scores will be flagged. | 99 | no | -| n_score_samples | the number of recent scores to use when calculating the percentile of the changefinder score. | 14400 | no | -| show_scores | set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time) | no | no | - -</details> - -#### Examples - -##### Default - -Default configuration. - -```yaml -local: - name: 'local' - host: '127.0.0.1:19999' - charts_regex: 'system\..*' - charts_to_exclude: '' - mode: 'per_chart' - cf_r: 0.5 - cf_order: 1 - cf_smooth: 15 - cf_threshold: 99 - n_score_samples: 14400 - show_scores: false - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `changefinder` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin changefinder debug trace - ``` - -### Debug Mode - - - -### Log Messages - - - - diff --git a/src/collectors/python.d.plugin/changefinder/metadata.yaml b/src/collectors/python.d.plugin/changefinder/metadata.yaml deleted file mode 100644 index 170d9146a..000000000 --- a/src/collectors/python.d.plugin/changefinder/metadata.yaml +++ /dev/null @@ -1,212 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: changefinder - monitored_instance: - name: python.d changefinder - link: "" - categories: - - data-collection.other - icon_filename: "" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - change detection - - anomaly detection - - machine learning - - ml - most_popular: false - overview: - data_collection: - metrics_description: | - This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to - perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection) - on your Netdata charts and/or dimensions. - method_description: > - Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a - changepoint score for each chart or dimension you configure it to work on. This is - an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step - to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap - to compute at each step of data collection (see the notes section below for more details) and it should scale fairly - well to work on lots of charts or hosts (if running on a parent node for example). - - ### Notes - - It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's - typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly - this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw - score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have - already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then - should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning - approaches which need some initial window of time before they can be useful. - - As this collector does most of the work in Python itself, you may want to try it out first on a test or development - system to get a sense of its performance characteristics on a node similar to where you would like to use it. - - On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the - typical performance characteristics we saw from running this collector (with defaults) were: - - A runtime (`netdata.runtime_changefinder`) of ~30ms. - - Typically ~1% additional cpu usage. - - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration. - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "By default this collector will work over all `system.*` charts." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Python Requirements - description: | - This collector will only work with Python 3 and requires the packages below be installed. - - ```bash - # become netdata user - sudo su -s /bin/bash netdata - # install required packages for the netdata user - pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4 - ``` - - **Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section - of your `netdata.conf` file. - - ```yaml - [ plugin:python.d ] - # update every = 1 - command options = -ppython3 - ``` - configuration: - file: - name: python.d/changefinder.conf - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: charts_regex - description: what charts to pull data for - A regex like `system\..*|` or `system\..*|apps.cpu|apps.mem` etc. - default_value: "system\\..*" - required: true - - name: charts_to_exclude - description: | - charts to exclude, useful if you would like to exclude some specific charts. - note: should be a ',' separated string like 'chart.name,chart.name'. - default_value: "" - required: false - - name: mode - description: get ChangeFinder scores 'per_dim' or 'per_chart'. - default_value: "per_chart" - required: true - - name: cf_r - description: default parameters that can be passed to the changefinder library. - default_value: 0.5 - required: false - - name: cf_order - description: default parameters that can be passed to the changefinder library. - default_value: 1 - required: false - - name: cf_smooth - description: default parameters that can be passed to the changefinder library. - default_value: 15 - required: false - - name: cf_threshold - description: the percentile above which scores will be flagged. - default_value: 99 - required: false - - name: n_score_samples - description: the number of recent scores to use when calculating the percentile of the changefinder score. - default_value: 14400 - required: false - - name: show_scores - description: | - set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time) - default_value: false - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Default - description: Default configuration. - folding: - enabled: false - config: | - local: - name: 'local' - host: '127.0.0.1:19999' - charts_regex: 'system\..*' - charts_to_exclude: '' - mode: 'per_chart' - cf_r: 0.5 - cf_order: 1 - cf_smooth: 15 - cf_threshold: 99 - n_score_samples: 14400 - show_scores: false - troubleshooting: - problems: - list: - - name: "Debug Mode" - description: | - If you would like to log in as `netdata` user and run the collector in debug mode to see more detail. - - ```bash - # become netdata user - sudo su -s /bin/bash netdata - # run collector in debug using `nolock` option if netdata is already running the collector itself. - /usr/libexec/netdata/plugins.d/python.d.plugin changefinder debug trace nolock - ``` - - name: "Log Messages" - description: | - To see any relevant log messages you can use a command like below. - - ```bash - grep 'changefinder' /var/log/netdata/error.log - grep 'changefinder' /var/log/netdata/collector.log - ``` - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "" - labels: [] - metrics: - - name: changefinder.scores - description: ChangeFinder - unit: "score" - chart_type: line - dimensions: - - name: a dimension per chart - - name: changefinder.flags - description: ChangeFinder - unit: "flag" - chart_type: stacked - dimensions: - - name: a dimension per chart diff --git a/src/collectors/python.d.plugin/dovecot/README.md b/src/collectors/python.d.plugin/dovecot/README.md deleted file mode 120000 index c4749cedc..000000000 --- a/src/collectors/python.d.plugin/dovecot/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/dovecot.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/dovecot/dovecot.chart.py b/src/collectors/python.d.plugin/dovecot/dovecot.chart.py deleted file mode 100644 index dfaef28b5..000000000 --- a/src/collectors/python.d.plugin/dovecot/dovecot.chart.py +++ /dev/null @@ -1,143 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: dovecot netdata python.d module -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.SocketService import SocketService - -UNIX_SOCKET = '/var/run/dovecot/stats' - -ORDER = [ - 'sessions', - 'logins', - 'commands', - 'faults', - 'context_switches', - 'io', - 'net', - 'syscalls', - 'lookup', - 'cache', - 'auth', - 'auth_cache' -] - -CHARTS = { - 'sessions': { - 'options': [None, 'Dovecot Active Sessions', 'number', 'sessions', 'dovecot.sessions', 'line'], - 'lines': [ - ['num_connected_sessions', 'active sessions', 'absolute'] - ] - }, - 'logins': { - 'options': [None, 'Dovecot Logins', 'number', 'logins', 'dovecot.logins', 'line'], - 'lines': [ - ['num_logins', 'logins', 'absolute'] - ] - }, - 'commands': { - 'options': [None, 'Dovecot Commands', 'commands', 'commands', 'dovecot.commands', 'line'], - 'lines': [ - ['num_cmds', 'commands', 'absolute'] - ] - }, - 'faults': { - 'options': [None, 'Dovecot Page Faults', 'faults', 'page faults', 'dovecot.faults', 'line'], - 'lines': [ - ['min_faults', 'minor', 'absolute'], - ['maj_faults', 'major', 'absolute'] - ] - }, - 'context_switches': { - 'options': [None, 'Dovecot Context Switches', 'switches', 'context switches', 'dovecot.context_switches', - 'line'], - 'lines': [ - ['vol_cs', 'voluntary', 'absolute'], - ['invol_cs', 'involuntary', 'absolute'] - ] - }, - 'io': { - 'options': [None, 'Dovecot Disk I/O', 'KiB/s', 'disk', 'dovecot.io', 'area'], - 'lines': [ - ['disk_input', 'read', 'incremental', 1, 1024], - ['disk_output', 'write', 'incremental', -1, 1024] - ] - }, - 'net': { - 'options': [None, 'Dovecot Network Bandwidth', 'kilobits/s', 'network', 'dovecot.net', 'area'], - 'lines': [ - ['read_bytes', 'read', 'incremental', 8, 1000], - ['write_bytes', 'write', 'incremental', -8, 1000] - ] - }, - 'syscalls': { - 'options': [None, 'Dovecot Number of SysCalls', 'syscalls/s', 'system', 'dovecot.syscalls', 'line'], - 'lines': [ - ['read_count', 'read', 'incremental'], - ['write_count', 'write', 'incremental'] - ] - }, - 'lookup': { - 'options': [None, 'Dovecot Lookups', 'number/s', 'lookups', 'dovecot.lookup', 'stacked'], - 'lines': [ - ['mail_lookup_path', 'path', 'incremental'], - ['mail_lookup_attr', 'attr', 'incremental'] - ] - }, - 'cache': { - 'options': [None, 'Dovecot Cache Hits', 'hits/s', 'cache', 'dovecot.cache', 'line'], - 'lines': [ - ['mail_cache_hits', 'hits', 'incremental'] - ] - }, - 'auth': { - 'options': [None, 'Dovecot Authentications', 'attempts', 'logins', 'dovecot.auth', 'stacked'], - 'lines': [ - ['auth_successes', 'ok', 'absolute'], - ['auth_failures', 'failed', 'absolute'] - ] - }, - 'auth_cache': { - 'options': [None, 'Dovecot Authentication Cache', 'number', 'cache', 'dovecot.auth_cache', 'stacked'], - 'lines': [ - ['auth_cache_hits', 'hit', 'absolute'], - ['auth_cache_misses', 'miss', 'absolute'] - ] - } -} - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - SocketService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.host = None # localhost - self.port = None # 24242 - self.unix_socket = UNIX_SOCKET - self.request = 'EXPORT\tglobal\r\n' - - def _get_data(self): - """ - Format data received from socket - :return: dict - """ - try: - raw = self._get_raw_data() - except (ValueError, AttributeError): - return None - - if raw is None: - self.debug('dovecot returned no data') - return None - - data = raw.split('\n')[:2] - desc = data[0].split('\t') - vals = data[1].split('\t') - ret = dict() - for i, _ in enumerate(desc): - try: - ret[str(desc[i])] = int(vals[i]) - except ValueError: - continue - return ret or None diff --git a/src/collectors/python.d.plugin/dovecot/dovecot.conf b/src/collectors/python.d.plugin/dovecot/dovecot.conf deleted file mode 100644 index 451dbc9ac..000000000 --- a/src/collectors/python.d.plugin/dovecot/dovecot.conf +++ /dev/null @@ -1,98 +0,0 @@ -# netdata python.d.plugin configuration for dovecot -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, dovecot also supports the following: -# -# socket: 'path/to/dovecot/stats' -# -# or -# host: 'IP or HOSTNAME' # the host to connect to -# port: PORT # the port to connect to -# -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name : 'local' - host : 'localhost' - port : 24242 - -localipv4: - name : 'local' - host : '127.0.0.1' - port : 24242 - -localipv6: - name : 'local' - host : '::1' - port : 24242 - -localsocket: - name : 'local' - socket : '/var/run/dovecot/stats' - -localsocket_old: - name : 'local' - socket : '/var/run/dovecot/old-stats' - diff --git a/src/collectors/python.d.plugin/dovecot/integrations/dovecot.md b/src/collectors/python.d.plugin/dovecot/integrations/dovecot.md deleted file mode 100644 index aaf207e85..000000000 --- a/src/collectors/python.d.plugin/dovecot/integrations/dovecot.md +++ /dev/null @@ -1,197 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/dovecot/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/dovecot/metadata.yaml" -sidebar_label: "Dovecot" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Mail Servers" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Dovecot - - -<img src="https://netdata.cloud/img/dovecot.svg" width="150"/> - - -Plugin: python.d.plugin -Module: dovecot - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Dovecot metrics about sessions, logins, commands, page faults and more. - -It uses the dovecot socket and executes the `EXPORT global` command to get the statistics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If no configuration is given, the collector will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats` - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Dovecot instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| dovecot.sessions | active sessions | number | -| dovecot.logins | logins | number | -| dovecot.commands | commands | commands | -| dovecot.faults | minor, major | faults | -| dovecot.context_switches | voluntary, involuntary | switches | -| dovecot.io | read, write | KiB/s | -| dovecot.net | read, write | kilobits/s | -| dovecot.syscalls | read, write | syscalls/s | -| dovecot.lookup | path, attr | number/s | -| dovecot.cache | hits | hits/s | -| dovecot.auth | ok, failed | attempts | -| dovecot.auth_cache | hit, miss | number | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Dovecot configuration - -The Dovecot UNIX socket should have R/W permissions for user netdata, or Dovecot should be configured with a TCP/IP socket. - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/dovecot.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/dovecot.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| socket | Use this socket to communicate with Devcot | /var/run/dovecot/stats | no | -| host | Instead of using a socket, you can point the collector to an ip for devcot statistics. | | no | -| port | Used in combination with host, configures the port devcot listens to. | | no | - -</details> - -#### Examples - -##### Local TCP - -A basic TCP configuration. - -<details open><summary>Config</summary> - -```yaml -localtcpip: - name: 'local' - host: '127.0.0.1' - port: 24242 - -``` -</details> - -##### Local socket - -A basic local socket configuration - -<details open><summary>Config</summary> - -```yaml -localsocket: - name: 'local' - socket: '/var/run/dovecot/stats' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `dovecot` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin dovecot debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/dovecot/metadata.yaml b/src/collectors/python.d.plugin/dovecot/metadata.yaml deleted file mode 100644 index b247da846..000000000 --- a/src/collectors/python.d.plugin/dovecot/metadata.yaml +++ /dev/null @@ -1,207 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: dovecot - monitored_instance: - name: Dovecot - link: 'https://www.dovecot.org/' - categories: - - data-collection.mail-servers - icon_filename: 'dovecot.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: - - dovecot - - imap - - mail - most_popular: false - overview: - data_collection: - metrics_description: 'This collector monitors Dovecot metrics about sessions, logins, commands, page faults and more.' - method_description: 'It uses the dovecot socket and executes the `EXPORT global` command to get the statistics.' - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: 'If no configuration is given, the collector will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats`' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: - - title: 'Dovecot configuration' - description: The Dovecot UNIX socket should have R/W permissions for user netdata, or Dovecot should be configured with a TCP/IP socket. - configuration: - file: - name: python.d/dovecot.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: '' - required: false - - name: socket - description: Use this socket to communicate with Devcot - default_value: /var/run/dovecot/stats - required: false - - name: host - description: Instead of using a socket, you can point the collector to an ip for devcot statistics. - default_value: '' - required: false - - name: port - description: Used in combination with host, configures the port devcot listens to. - default_value: '' - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Local TCP - description: A basic TCP configuration. - config: | - localtcpip: - name: 'local' - host: '127.0.0.1' - port: 24242 - - name: Local socket - description: A basic local socket configuration - config: | - localsocket: - name: 'local' - socket: '/var/run/dovecot/stats' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: dovecot.sessions - description: Dovecot Active Sessions - unit: "number" - chart_type: line - dimensions: - - name: active sessions - - name: dovecot.logins - description: Dovecot Logins - unit: "number" - chart_type: line - dimensions: - - name: logins - - name: dovecot.commands - description: Dovecot Commands - unit: "commands" - chart_type: line - dimensions: - - name: commands - - name: dovecot.faults - description: Dovecot Page Faults - unit: "faults" - chart_type: line - dimensions: - - name: minor - - name: major - - name: dovecot.context_switches - description: Dovecot Context Switches - unit: "switches" - chart_type: line - dimensions: - - name: voluntary - - name: involuntary - - name: dovecot.io - description: Dovecot Disk I/O - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: dovecot.net - description: Dovecot Network Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: read - - name: write - - name: dovecot.syscalls - description: Dovecot Number of SysCalls - unit: "syscalls/s" - chart_type: line - dimensions: - - name: read - - name: write - - name: dovecot.lookup - description: Dovecot Lookups - unit: "number/s" - chart_type: stacked - dimensions: - - name: path - - name: attr - - name: dovecot.cache - description: Dovecot Cache Hits - unit: "hits/s" - chart_type: line - dimensions: - - name: hits - - name: dovecot.auth - description: Dovecot Authentications - unit: "attempts" - chart_type: stacked - dimensions: - - name: ok - - name: failed - - name: dovecot.auth_cache - description: Dovecot Authentication Cache - unit: "number" - chart_type: stacked - dimensions: - - name: hit - - name: miss diff --git a/src/collectors/python.d.plugin/example/README.md b/src/collectors/python.d.plugin/example/README.md deleted file mode 120000 index 55877a99a..000000000 --- a/src/collectors/python.d.plugin/example/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/example_collector.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/example/example.chart.py b/src/collectors/python.d.plugin/example/example.chart.py deleted file mode 100644 index d6c0b6658..000000000 --- a/src/collectors/python.d.plugin/example/example.chart.py +++ /dev/null @@ -1,51 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: example netdata python.d module -# Author: Put your name here (your github login) -# SPDX-License-Identifier: GPL-3.0-or-later - -from random import SystemRandom - -from bases.FrameworkServices.SimpleService import SimpleService - -priority = 90000 - -ORDER = [ - 'random', -] - -CHARTS = { - 'random': { - 'options': [None, 'A random number', 'random number', 'random', 'random', 'line'], - 'lines': [ - ['random1'] - ] - } -} - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.random = SystemRandom() - self.num_lines = self.configuration.get('num_lines', 4) - self.lower = self.configuration.get('lower', 0) - self.upper = self.configuration.get('upper', 100) - - @staticmethod - def check(): - return True - - def get_data(self): - data = dict() - - for i in range(0, self.num_lines): - dimension_id = ''.join(['random', str(i)]) - - if dimension_id not in self.charts['random']: - self.charts['random'].add_dimension([dimension_id]) - - data[dimension_id] = self.random.randint(self.lower, self.upper) - - return data diff --git a/src/collectors/python.d.plugin/example/example.conf b/src/collectors/python.d.plugin/example/example.conf deleted file mode 100644 index 31261b840..000000000 --- a/src/collectors/python.d.plugin/example/example.conf +++ /dev/null @@ -1,87 +0,0 @@ -# netdata python.d.plugin configuration for example -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear on the dashboard -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, example also supports the following: -# -# num_lines: 4 # the number of lines to create -# lower: 0 # the lower bound of numbers to randomly sample from -# upper: 100 # the upper bound of numbers to randomly sample from -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS - -four_lines: - name: "Four Lines" # the JOB's name as it will appear on the dashboard - update_every: 1 # the JOB's data collection frequency - priority: 60000 # the JOB's order on the dashboard - penalty: yes # the JOB's penalty - autodetection_retry: 0 # the JOB's re-check interval in seconds - num_lines: 4 # the number of lines to create - lower: 0 # the lower bound of numbers to randomly sample from - upper: 100 # the upper bound of numbers to randomly sample from - -# if you wanted to make another job to run in addition to the one above then -# you would just uncomment the job configuration below. -# two_lines: -# name: "Two Lines" # the JOB's name as it will appear on the dashboard -# num_lines: 2 # the number of lines to create -# lower: 50 # the lower bound of numbers to randomly sample from -# upper: 75 # the upper bound of numbers to randomly sample from diff --git a/src/collectors/python.d.plugin/example/integrations/example_collector.md b/src/collectors/python.d.plugin/example/integrations/example_collector.md deleted file mode 100644 index 03c0165b4..000000000 --- a/src/collectors/python.d.plugin/example/integrations/example_collector.md +++ /dev/null @@ -1,171 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/example/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/example/metadata.yaml" -sidebar_label: "Example collector" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Other" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Example collector - -Plugin: python.d.plugin -Module: example - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Example collector that generates some random numbers as metrics. - -If you want to write your own collector, read our [writing a new Python module](/src/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial. - - -The `get_data()` function uses `random.randint()` to generate a random number which will be collected as a metric. - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This integration doesn't support auto-detection. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Example collector instance - -These metrics refer to the entire monitored application. - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| example.random | random | number | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/example.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/example.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| num_lines | The number of lines to create. | 4 | no | -| lower | The lower bound of numbers to randomly sample from. | 0 | no | -| upper | The upper bound of numbers to randomly sample from. | 100 | no | -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | - -</details> - -#### Examples - -##### Basic - -A basic example configuration. - -```yaml -four_lines: - name: "Four Lines" - update_every: 1 - priority: 60000 - penalty: yes - autodetection_retry: 0 - num_lines: 4 - lower: 0 - upper: 100 - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `example` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin example debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/example/metadata.yaml b/src/collectors/python.d.plugin/example/metadata.yaml deleted file mode 100644 index 6b2401366..000000000 --- a/src/collectors/python.d.plugin/example/metadata.yaml +++ /dev/null @@ -1,138 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: example - monitored_instance: - name: Example collector - link: /src/collectors/python.d.plugin/example/README.md - categories: - - data-collection.other - icon_filename: "" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - example - - netdata - - python - most_popular: false - overview: - data_collection: - metrics_description: | - Example collector that generates some random numbers as metrics. - - If you want to write your own collector, read our [writing a new Python module](/src/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial. - method_description: | - The `get_data()` function uses `random.randint()` to generate a random number which will be collected as a metric. - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "" - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: python.d/example.conf - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: Config options - enabled: true - list: - - name: num_lines - description: The number of lines to create. - default_value: 4 - required: false - - name: lower - description: The lower bound of numbers to randomly sample from. - default_value: 0 - required: false - - name: upper - description: The upper bound of numbers to randomly sample from. - default_value: 100 - required: false - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - examples: - folding: - enabled: true - title: Config - list: - - name: Basic - folding: - enabled: false - description: A basic example configuration. - config: | - four_lines: - name: "Four Lines" - update_every: 1 - priority: 60000 - penalty: yes - autodetection_retry: 0 - num_lines: 4 - lower: 0 - upper: 100 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: | - These metrics refer to the entire monitored application. - labels: [] - metrics: - - name: example.random - description: A random number - unit: number - chart_type: line - dimensions: - - name: random diff --git a/src/collectors/python.d.plugin/exim/README.md b/src/collectors/python.d.plugin/exim/README.md deleted file mode 120000 index f1f2ef9f9..000000000 --- a/src/collectors/python.d.plugin/exim/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/exim.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/exim/exim.chart.py b/src/collectors/python.d.plugin/exim/exim.chart.py deleted file mode 100644 index 7238a1bea..000000000 --- a/src/collectors/python.d.plugin/exim/exim.chart.py +++ /dev/null @@ -1,39 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: exim netdata python.d module -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.ExecutableService import ExecutableService - -EXIM_COMMAND = 'exim -bpc' - -ORDER = [ - 'qemails', -] - -CHARTS = { - 'qemails': { - 'options': [None, 'Exim Queue Emails', 'emails', 'queue', 'exim.qemails', 'line'], - 'lines': [ - ['emails', None, 'absolute'] - ] - } -} - - -class Service(ExecutableService): - def __init__(self, configuration=None, name=None): - ExecutableService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.command = EXIM_COMMAND - - def _get_data(self): - """ - Format data received from shell command - :return: dict - """ - try: - return {'emails': int(self._get_raw_data()[0])} - except (ValueError, AttributeError): - return None diff --git a/src/collectors/python.d.plugin/exim/exim.conf b/src/collectors/python.d.plugin/exim/exim.conf deleted file mode 100644 index 3b7e65922..000000000 --- a/src/collectors/python.d.plugin/exim/exim.conf +++ /dev/null @@ -1,91 +0,0 @@ -# netdata python.d.plugin configuration for exim -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# exim is slow, so once every 10 seconds -update_every: 10 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, exim also supports the following: -# -# command: 'exim -bpc' # the command to run -# - -# ---------------------------------------------------------------------- -# REQUIRED exim CONFIGURATION -# -# netdata will query exim as user netdata. -# By default exim will refuse to respond. -# -# To allow querying exim as non-admin user, please set the following -# to your exim configuration: -# -# queue_list_requires_admin = false -# -# Your exim configuration should be in -# -# /etc/exim/exim4.conf -# or -# /etc/exim4/conf.d/main/000_local_options -# -# Please consult your distribution information to find the exact file. - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS - -local: - command: 'exim -bpc' diff --git a/src/collectors/python.d.plugin/exim/integrations/exim.md b/src/collectors/python.d.plugin/exim/integrations/exim.md deleted file mode 100644 index a64a5449b..000000000 --- a/src/collectors/python.d.plugin/exim/integrations/exim.md +++ /dev/null @@ -1,181 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/exim/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/exim/metadata.yaml" -sidebar_label: "Exim" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Mail Servers" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Exim - - -<img src="https://netdata.cloud/img/exim.jpg" width="150"/> - - -Plugin: python.d.plugin -Module: exim - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Exim mail queue. - -It uses the `exim` command line binary to get the statistics. - -This collector is supported on all platforms. - -This collector only supports collecting metrics from a single instance of this integration. - - -### Default Behavior - -#### Auto-Detection - -Assuming setup prerequisites are met, the collector will try to gather statistics using the method described above, even without any configuration. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Exim instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| exim.qemails | emails | emails | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Exim configuration - local installation - -The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`. - -1. Edit the `exim` configuration with your preferred editor and add: -`queue_list_requires_admin = false` -2. Restart `exim` and Netdata - - -#### Exim configuration - WHM (CPanel) server - -On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps. - -1. Login to WHM -2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor -3. Scroll down to the button **Add additional configuration setting** and click on it. -4. In the new dropdown which will appear above we need to find and choose: -`queue_list_requires_admin` and set to `false` -5. Scroll to the end and click the **Save** button. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/exim.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/exim.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| command | Path and command to the `exim` binary | exim -bpc | no | - -</details> - -#### Examples - -##### Local exim install - -A basic local exim install - -```yaml -local: - command: 'exim -bpc' - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `exim` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin exim debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/exim/metadata.yaml b/src/collectors/python.d.plugin/exim/metadata.yaml deleted file mode 100644 index a8be02d99..000000000 --- a/src/collectors/python.d.plugin/exim/metadata.yaml +++ /dev/null @@ -1,132 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: exim - monitored_instance: - name: Exim - link: "https://www.exim.org/" - categories: - - data-collection.mail-servers - icon_filename: "exim.jpg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - exim - - mail - - server - most_popular: false - overview: - data_collection: - metrics_description: "This collector monitors Exim mail queue." - method_description: "It uses the `exim` command line binary to get the statistics." - supported_platforms: - include: [] - exclude: [] - multi_instance: false - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "Assuming setup prerequisites are met, the collector will try to gather statistics using the method described above, even without any configuration." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "Exim configuration - local installation" - description: | - The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`. - - 1. Edit the `exim` configuration with your preferred editor and add: - `queue_list_requires_admin = false` - 2. Restart `exim` and Netdata - - title: "Exim configuration - WHM (CPanel) server" - description: | - On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps. - - 1. Login to WHM - 2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor - 3. Scroll down to the button **Add additional configuration setting** and click on it. - 4. In the new dropdown which will appear above we need to find and choose: - `queue_list_requires_admin` and set to `false` - 5. Scroll to the end and click the **Save** button. - configuration: - file: - name: python.d/exim.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: command - description: Path and command to the `exim` binary - default_value: "exim -bpc" - required: false - examples: - folding: - enabled: false - title: "Config" - list: - - name: Local exim install - description: A basic local exim install - config: | - local: - command: 'exim -bpc' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: exim.qemails - description: Exim Queue Emails - unit: "emails" - chart_type: line - dimensions: - - name: emails diff --git a/src/collectors/python.d.plugin/gearman/README.md b/src/collectors/python.d.plugin/gearman/README.md deleted file mode 120000 index 70189d698..000000000 --- a/src/collectors/python.d.plugin/gearman/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/gearman.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/gearman/gearman.chart.py b/src/collectors/python.d.plugin/gearman/gearman.chart.py deleted file mode 100644 index 5e280a4d8..000000000 --- a/src/collectors/python.d.plugin/gearman/gearman.chart.py +++ /dev/null @@ -1,243 +0,0 @@ -# Description: dovecot netdata python.d module -# Author: Kyle Agronick (agronick) -# SPDX-License-Identifier: GPL-3.0+ - -# Gearman Netdata Plugin - -from copy import deepcopy - -from bases.FrameworkServices.SocketService import SocketService - -CHARTS = { - 'total_workers': { - 'options': [None, 'Total Jobs', 'Jobs', 'Total Jobs', 'gearman.total_jobs', 'line'], - 'lines': [ - ['total_pending', 'Pending', 'absolute'], - ['total_running', 'Running', 'absolute'], - ] - }, -} - - -def job_chart_template(job_name): - return { - 'options': [None, job_name, 'Jobs', 'Activity by Job', 'gearman.single_job', 'stacked'], - 'lines': [ - ['{0}_pending'.format(job_name), 'Pending', 'absolute'], - ['{0}_idle'.format(job_name), 'Idle', 'absolute'], - ['{0}_running'.format(job_name), 'Running', 'absolute'], - ] - } - - -def build_result_dict(job): - """ - Get the status for each job - :return: dict - """ - - total, running, available = job['metrics'] - - idle = available - running - pending = total - running - - return { - '{0}_pending'.format(job['job_name']): pending, - '{0}_idle'.format(job['job_name']): idle, - '{0}_running'.format(job['job_name']): running, - } - - -def parse_worker_data(job): - job_name = job[0] - job_metrics = job[1:] - - return { - 'job_name': job_name, - 'metrics': job_metrics, - } - - -class GearmanReadException(BaseException): - pass - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - super(Service, self).__init__(configuration=configuration, name=name) - self.request = "status\n" - self._keep_alive = True - - self.host = self.configuration.get('host', 'localhost') - self.port = self.configuration.get('port', 4730) - - self.tls = self.configuration.get('tls', False) - self.cert = self.configuration.get('cert', None) - self.key = self.configuration.get('key', None) - - self.active_jobs = set() - self.definitions = deepcopy(CHARTS) - self.order = ['total_workers'] - - def _get_data(self): - """ - Format data received from socket - :return: dict - """ - - try: - active_jobs = self.get_active_jobs() - except GearmanReadException: - return None - - found_jobs, job_data = self.process_jobs(active_jobs) - self.remove_stale_jobs(found_jobs) - return job_data - - def get_active_jobs(self): - active_jobs = [] - - for job in self.get_worker_data(): - parsed_job = parse_worker_data(job) - - # Gearman does not clean up old jobs - # We only care about jobs that have - # some relevant data - if not any(parsed_job['metrics']): - continue - - active_jobs.append(parsed_job) - - return active_jobs - - def get_worker_data(self): - """ - Split the data returned from Gearman - into a list of lists - - This returns the same output that you - would get from a gearadmin --status - command. - - Example output returned from - _get_raw_data(): - prefix generic_worker4 78 78 500 - generic_worker2 78 78 500 - generic_worker3 0 0 760 - generic_worker1 0 0 500 - - :return: list - """ - - try: - raw = self._get_raw_data() - except (ValueError, AttributeError): - raise GearmanReadException() - - if raw is None: - self.debug("Gearman returned no data") - raise GearmanReadException() - - workers = list() - - for line in raw.splitlines()[:-1]: - parts = line.split() - if not parts: - continue - - name = '_'.join(parts[:-3]) - try: - values = [int(w) for w in parts[-3:]] - except ValueError: - continue - - w = [name] - w.extend(values) - workers.append(w) - - return workers - - def process_jobs(self, active_jobs): - - output = { - 'total_pending': 0, - 'total_idle': 0, - 'total_running': 0, - } - found_jobs = set() - - for parsed_job in active_jobs: - - job_name = self.add_job(parsed_job) - found_jobs.add(job_name) - job_data = build_result_dict(parsed_job) - - for sum_value in ('pending', 'running', 'idle'): - output['total_{0}'.format(sum_value)] += job_data['{0}_{1}'.format(job_name, sum_value)] - - output.update(job_data) - - return found_jobs, output - - def remove_stale_jobs(self, active_job_list): - """ - Removes jobs that have no workers, pending jobs, - or running jobs - :param active_job_list: The latest list of active jobs - :type active_job_list: iterable - :return: None - """ - - for to_remove in self.active_jobs - active_job_list: - self.remove_job(to_remove) - - def add_job(self, parsed_job): - """ - Adds a job to the list of active jobs - :param parsed_job: A parsed job dict - :type parsed_job: dict - :return: None - """ - - def add_chart(job_name): - """ - Adds a new job chart - :param job_name: The name of the job to add - :type job_name: string - :return: None - """ - - job_key = 'job_{0}'.format(job_name) - template = job_chart_template(job_name) - new_chart = self.charts.add_chart([job_key] + template['options']) - for dimension in template['lines']: - new_chart.add_dimension(dimension) - - if parsed_job['job_name'] not in self.active_jobs: - add_chart(parsed_job['job_name']) - self.active_jobs.add(parsed_job['job_name']) - - return parsed_job['job_name'] - - def remove_job(self, job_name): - """ - Removes a job to the list of active jobs - :param job_name: The name of the job to remove - :type job_name: string - :return: None - """ - - def remove_chart(job_name): - """ - Removes a job chart - :param job_name: The name of the job to remove - :type job_name: string - :return: None - """ - - job_key = 'job_{0}'.format(job_name) - self.charts[job_key].obsolete() - del self.charts[job_key] - - remove_chart(job_name) - self.active_jobs.remove(job_name) diff --git a/src/collectors/python.d.plugin/gearman/gearman.conf b/src/collectors/python.d.plugin/gearman/gearman.conf deleted file mode 100644 index 635e893ef..000000000 --- a/src/collectors/python.d.plugin/gearman/gearman.conf +++ /dev/null @@ -1,75 +0,0 @@ -# netdata python.d.plugin configuration for gearman -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, gearman also supports the following: -# -# host: localhost # The host running the Gearman server -# port: 4730 # Port of the Gearman server -# tls: no # Whether to use TLS or not -# cert: /path/to/cert # Path to cert if using TLS -# key: /path/to/key # Path to key if using TLS -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOB - -localhost: - name : 'local' - host : 'localhost' - port : 4730
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/gearman/integrations/gearman.md b/src/collectors/python.d.plugin/gearman/integrations/gearman.md deleted file mode 100644 index 717b0dcad..000000000 --- a/src/collectors/python.d.plugin/gearman/integrations/gearman.md +++ /dev/null @@ -1,210 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/gearman/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/gearman/metadata.yaml" -sidebar_label: "Gearman" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Distributed Computing Systems" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Gearman - - -<img src="https://netdata.cloud/img/gearman.png" width="150"/> - - -Plugin: python.d.plugin -Module: gearman - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management. - -This collector connects to a Gearman instance via either TCP or unix socket. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Gearman instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| gearman.total_jobs | Pending, Running | Jobs | - -### Per gearman job - -Metrics related to Gearman jobs. Each job produces its own set of the following metrics. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| gearman.single_job | Pending, Idle, Runnning | Jobs | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ gearman_workers_queued ](https://github.com/netdata/netdata/blob/master/src/health/health.d/gearman.conf) | gearman.single_job | average number of queued jobs over the last 10 minutes | - - -## Setup - -### Prerequisites - -#### Socket permissions - -The gearman UNIX socket should have read permission for user netdata. - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/gearman.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/gearman.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| host | URL or IP where gearman is running. | localhost | no | -| port | Port of URL or IP where gearman is running. | 4730 | no | -| tls | Use tls to connect to gearman. | false | no | -| cert | Provide a certificate file if needed to connect to a TLS gearman instance. | | no | -| key | Provide a key file if needed to connect to a TLS gearman instance. | | no | - -</details> - -#### Examples - -##### Local gearman service - -A basic host and port gearman configuration for localhost. - -```yaml -localhost: - name: 'local' - host: 'localhost' - port: 4730 - -``` -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local' - host: 'localhost' - port: 4730 - -remote: - name: 'remote' - host: '192.0.2.1' - port: 4730 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `gearman` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin gearman debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/gearman/metadata.yaml b/src/collectors/python.d.plugin/gearman/metadata.yaml deleted file mode 100644 index 4ab9c12ef..000000000 --- a/src/collectors/python.d.plugin/gearman/metadata.yaml +++ /dev/null @@ -1,168 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: gearman - monitored_instance: - name: Gearman - link: "http://gearman.org/" - categories: - - data-collection.distributed-computing-systems - icon_filename: "gearman.png" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - gearman - - gearman job server - most_popular: false - overview: - data_collection: - metrics_description: "Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management." - method_description: "This collector connects to a Gearman instance via either TCP or unix socket." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "Socket permissions" - description: The gearman UNIX socket should have read permission for user netdata. - configuration: - file: - name: python.d/gearman.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: host - description: URL or IP where gearman is running. - default_value: "localhost" - required: false - - name: port - description: Port of URL or IP where gearman is running. - default_value: "4730" - required: false - - name: tls - description: Use tls to connect to gearman. - default_value: "false" - required: false - - name: cert - description: Provide a certificate file if needed to connect to a TLS gearman instance. - default_value: "" - required: false - - name: key - description: Provide a key file if needed to connect to a TLS gearman instance. - default_value: "" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Local gearman service - description: A basic host and port gearman configuration for localhost. - folding: - enabled: false - config: | - localhost: - name: 'local' - host: 'localhost' - port: 4730 - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - localhost: - name: 'local' - host: 'localhost' - port: 4730 - - remote: - name: 'remote' - host: '192.0.2.1' - port: 4730 - troubleshooting: - problems: - list: [] - alerts: - - name: gearman_workers_queued - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/gearman.conf - metric: gearman.single_job - info: average number of queued jobs over the last 10 minutes - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: gearman.total_jobs - description: Total Jobs - unit: "Jobs" - chart_type: line - dimensions: - - name: Pending - - name: Running - - name: gearman job - description: "Metrics related to Gearman jobs. Each job produces its own set of the following metrics." - labels: [] - metrics: - - name: gearman.single_job - description: "{job_name}" - unit: "Jobs" - chart_type: stacked - dimensions: - - name: Pending - - name: Idle - - name: Runnning diff --git a/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md b/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md index cbe7f265f..8f086765e 100644 --- a/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md +++ b/src/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md @@ -310,6 +310,7 @@ app1: ### Debug Mode + To troubleshoot issues with the `go_expvar` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -332,4 +333,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin go_expvar debug trace ``` +### Getting Logs + +If you're encountering problems with the `go_expvar` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep go_expvar +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep go_expvar /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep go_expvar +``` + diff --git a/src/collectors/python.d.plugin/haproxy/metadata.yaml b/src/collectors/python.d.plugin/haproxy/metadata.yaml index f389b066e..e748a294c 100644 --- a/src/collectors/python.d.plugin/haproxy/metadata.yaml +++ b/src/collectors/python.d.plugin/haproxy/metadata.yaml @@ -1,5 +1,5 @@ # This collector will not appear in documentation, as the go version is preferred, -# /src/go/collectors/go.d.plugin/modules/haproxy/README.md +# /src/go/plugin/go.d/modules/haproxy/README.md # # # meta: diff --git a/src/collectors/python.d.plugin/icecast/README.md b/src/collectors/python.d.plugin/icecast/README.md deleted file mode 120000 index db3c1b572..000000000 --- a/src/collectors/python.d.plugin/icecast/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/icecast.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/icecast/icecast.chart.py b/src/collectors/python.d.plugin/icecast/icecast.chart.py deleted file mode 100644 index a967d1779..000000000 --- a/src/collectors/python.d.plugin/icecast/icecast.chart.py +++ /dev/null @@ -1,94 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: icecast netdata python.d module -# Author: Ilya Mashchenko (ilyam8) -# SPDX-License-Identifier: GPL-3.0-or-later - -import json - -from bases.FrameworkServices.UrlService import UrlService - -ORDER = [ - 'listeners', -] - -CHARTS = { - 'listeners': { - 'options': [None, 'Number Of Listeners', 'listeners', 'listeners', 'icecast.listeners', 'line'], - 'lines': [ - ] - } -} - - -class Source: - def __init__(self, idx, data): - self.name = 'source_{0}'.format(idx) - self.is_active = data.get('stream_start') and data.get('server_name') - self.listeners = data['listeners'] - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.url = self.configuration.get('url') - self._manager = self._build_manager() - - def check(self): - """ - Add active sources to the "listeners" chart - :return: bool - """ - sources = self.get_sources() - if not sources: - return None - - active_sources = 0 - for idx, raw_source in enumerate(sources): - if Source(idx, raw_source).is_active: - active_sources += 1 - dim_id = 'source_{0}'.format(idx) - dim = 'source {0}'.format(idx) - self.definitions['listeners']['lines'].append([dim_id, dim]) - - return bool(active_sources) - - def _get_data(self): - """ - Get number of listeners for every source - :return: dict - """ - sources = self.get_sources() - if not sources: - return None - - data = dict() - - for idx, raw_source in enumerate(sources): - source = Source(idx, raw_source) - data[source.name] = source.listeners - - return data - - def get_sources(self): - """ - Format data received from http request and return list of sources - :return: list - """ - - raw_data = self._get_raw_data() - if not raw_data: - return None - - try: - data = json.loads(raw_data) - except ValueError as error: - self.error('JSON decode error:', error) - return None - - sources = data['icestats'].get('source') - if not sources: - return None - - return sources if isinstance(sources, list) else [sources] diff --git a/src/collectors/python.d.plugin/icecast/icecast.conf b/src/collectors/python.d.plugin/icecast/icecast.conf deleted file mode 100644 index a33074aef..000000000 --- a/src/collectors/python.d.plugin/icecast/icecast.conf +++ /dev/null @@ -1,81 +0,0 @@ -# netdata python.d.plugin configuration for icecast -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, icecast also supports the following: -# -# url: 'URL' # the URL to fetch icecast's stats -# -# if the URL is password protected, the following are supported: -# -# user: 'username' -# pass: 'password' - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name : 'local' - url : 'http://localhost:8443/status-json.xsl' - -localipv4: - name : 'local' - url : 'http://127.0.0.1:8443/status-json.xsl'
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/icecast/integrations/icecast.md b/src/collectors/python.d.plugin/icecast/integrations/icecast.md deleted file mode 100644 index 17316b063..000000000 --- a/src/collectors/python.d.plugin/icecast/integrations/icecast.md +++ /dev/null @@ -1,166 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/icecast/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/icecast/metadata.yaml" -sidebar_label: "Icecast" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Media Services" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Icecast - - -<img src="https://netdata.cloud/img/icecast.svg" width="150"/> - - -Plugin: python.d.plugin -Module: icecast - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Icecast listener counts. - -It connects to an icecast URL and uses the `status-json.xsl` endpoint to retrieve statistics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -Without configuration, the collector attempts to connect to http://localhost:8443/status-json.xsl - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Icecast instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| icecast.listeners | a dimension for each active source | listeners | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Icecast minimum version - -Needs at least icecast version >= 2.4.0 - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/icecast.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/icecast.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| url | The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` | http://localhost:8443/status-json.xsl | no | -| user | Username to use to connect to `url` if it's password protected. | | no | -| pass | Password to use to connect to `url` if it's password protected. | | no | - -</details> - -#### Examples - -##### Remote Icecast server - -Configure a remote icecast server - -```yaml -remote: - url: 'http://1.2.3.4:8443/status-json.xsl' - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `icecast` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin icecast debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/icecast/metadata.yaml b/src/collectors/python.d.plugin/icecast/metadata.yaml deleted file mode 100644 index 4bcf5e39f..000000000 --- a/src/collectors/python.d.plugin/icecast/metadata.yaml +++ /dev/null @@ -1,127 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: icecast - monitored_instance: - name: Icecast - link: 'https://icecast.org/' - categories: - - data-collection.media-streaming-servers - icon_filename: 'icecast.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: - - icecast - - streaming - - media - most_popular: false - overview: - data_collection: - metrics_description: 'This collector monitors Icecast listener counts.' - method_description: 'It connects to an icecast URL and uses the `status-json.xsl` endpoint to retrieve statistics.' - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: 'Without configuration, the collector attempts to connect to http://localhost:8443/status-json.xsl' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: - - title: 'Icecast minimum version' - description: 'Needs at least icecast version >= 2.4.0' - configuration: - file: - name: python.d/icecast.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: '' - required: false - - name: url - description: The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` - default_value: 'http://localhost:8443/status-json.xsl' - required: false - - name: user - description: Username to use to connect to `url` if it's password protected. - default_value: '' - required: false - - name: pass - description: Password to use to connect to `url` if it's password protected. - default_value: '' - required: false - examples: - folding: - enabled: false - title: "Config" - list: - - name: Remote Icecast server - description: Configure a remote icecast server - folding: - enabled: false - config: | - remote: - url: 'http://1.2.3.4:8443/status-json.xsl' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: icecast.listeners - description: Number Of Listeners - unit: "listeners" - chart_type: line - dimensions: - - name: a dimension for each active source diff --git a/src/collectors/python.d.plugin/ipfs/README.md b/src/collectors/python.d.plugin/ipfs/README.md deleted file mode 120000 index eee6a07b2..000000000 --- a/src/collectors/python.d.plugin/ipfs/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/ipfs.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/ipfs/integrations/ipfs.md b/src/collectors/python.d.plugin/ipfs/integrations/ipfs.md deleted file mode 100644 index 71e8e28a5..000000000 --- a/src/collectors/python.d.plugin/ipfs/integrations/ipfs.md +++ /dev/null @@ -1,203 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/ipfs/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/ipfs/metadata.yaml" -sidebar_label: "IPFS" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Storage, Mount Points and Filesystems" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# IPFS - - -<img src="https://netdata.cloud/img/ipfs.svg" width="150"/> - - -Plugin: python.d.plugin -Module: ipfs - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors IPFS server metrics about its quality and performance. - -It connects to an http endpoint of the IPFS server to collect the metrics - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If the endpoint is accessible by the Agent, netdata will autodetect it - -#### Limits - -Calls to the following endpoints are disabled due to IPFS bugs: - -/api/v0/stats/repo (https://github.com/ipfs/go-ipfs/issues/3874) -/api/v0/pin/ls (https://github.com/ipfs/go-ipfs/issues/7528) - - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per IPFS instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| ipfs.bandwidth | in, out | kilobits/s | -| ipfs.peers | peers | peers | -| ipfs.repo_size | avail, size | GiB | -| ipfs.repo_objects | objects, pinned, recursive_pins | objects | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ ipfs_datastore_usage ](https://github.com/netdata/netdata/blob/master/src/health/health.d/ipfs.conf) | ipfs.repo_size | IPFS datastore utilization | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/ipfs.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/ipfs.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary></summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no | -| url | URL to the IPFS API | no | yes | -| repoapi | Collect repo metrics. | no | no | -| pinapi | Set status of IPFS pinned object polling. | no | no | - -</details> - -#### Examples - -##### Basic (default out-of-the-box) - -A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. - -```yaml -localhost: - name: 'local' - url: 'http://localhost:5001' - repoapi: no - pinapi: no - -``` -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local' - url: 'http://localhost:5001' - repoapi: no - pinapi: no - -remote_host: - name: 'remote' - url: 'http://192.0.2.1:5001' - repoapi: no - pinapi: no - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `ipfs` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin ipfs debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/ipfs/ipfs.chart.py b/src/collectors/python.d.plugin/ipfs/ipfs.chart.py deleted file mode 100644 index abfc9c492..000000000 --- a/src/collectors/python.d.plugin/ipfs/ipfs.chart.py +++ /dev/null @@ -1,149 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: IPFS netdata python.d module -# Authors: davidak -# SPDX-License-Identifier: GPL-3.0-or-later - -import json - -from bases.FrameworkServices.UrlService import UrlService - -ORDER = [ - 'bandwidth', - 'peers', - 'repo_size', - 'repo_objects', -] - -CHARTS = { - 'bandwidth': { - 'options': [None, 'IPFS Bandwidth', 'kilobits/s', 'Bandwidth', 'ipfs.bandwidth', 'line'], - 'lines': [ - ['in', None, 'absolute', 8, 1000], - ['out', None, 'absolute', -8, 1000] - ] - }, - 'peers': { - 'options': [None, 'IPFS Peers', 'peers', 'Peers', 'ipfs.peers', 'line'], - 'lines': [ - ['peers', None, 'absolute'] - ] - }, - 'repo_size': { - 'options': [None, 'IPFS Repo Size', 'GiB', 'Size', 'ipfs.repo_size', 'area'], - 'lines': [ - ['avail', None, 'absolute', 1, 1 << 30], - ['size', None, 'absolute', 1, 1 << 30], - ] - }, - 'repo_objects': { - 'options': [None, 'IPFS Repo Objects', 'objects', 'Objects', 'ipfs.repo_objects', 'line'], - 'lines': [ - ['objects', None, 'absolute', 1, 1], - ['pinned', None, 'absolute', 1, 1], - ['recursive_pins', None, 'absolute', 1, 1] - ] - } -} - -SI_zeroes = { - 'k': 3, - 'm': 6, - 'g': 9, - 't': 12, - 'p': 15, - 'e': 18, - 'z': 21, - 'y': 24 -} - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.baseurl = self.configuration.get('url', 'http://localhost:5001') - self.method = "POST" - self.do_pinapi = self.configuration.get('pinapi') - self.do_repoapi = self.configuration.get('repoapi') - self.__storage_max = None - - def _get_json(self, sub_url): - """ - :return: json decoding of the specified url - """ - self.url = self.baseurl + sub_url - try: - return json.loads(self._get_raw_data()) - except (TypeError, ValueError): - return dict() - - @staticmethod - def _recursive_pins(keys): - return sum(1 for k in keys if keys[k]['Type'] == b'recursive') - - @staticmethod - def _dehumanize(store_max): - # convert from '10Gb' to 10000000000 - if not isinstance(store_max, int): - store_max = store_max.lower() - if store_max.endswith('b'): - val, units = store_max[:-2], store_max[-2] - if units in SI_zeroes: - val += '0' * SI_zeroes[units] - store_max = val - try: - store_max = int(store_max) - except (TypeError, ValueError): - store_max = None - return store_max - - def _storagemax(self, store_cfg): - if self.__storage_max is None: - self.__storage_max = self._dehumanize(store_cfg) - return self.__storage_max - - def _get_data(self): - """ - Get data from API - :return: dict - """ - # suburl : List of (result-key, original-key, transform-func) - cfg = { - '/api/v0/stats/bw': - [ - ('in', 'RateIn', int), - ('out', 'RateOut', int), - ], - '/api/v0/swarm/peers': - [ - ('peers', 'Peers', len), - ], - } - if self.do_repoapi: - cfg.update({ - '/api/v0/stats/repo': - [ - ('size', 'RepoSize', int), - ('objects', 'NumObjects', int), - ('avail', 'StorageMax', self._storagemax), - ], - }) - - if self.do_pinapi: - cfg.update({ - '/api/v0/pin/ls': - [ - ('pinned', 'Keys', len), - ('recursive_pins', 'Keys', self._recursive_pins), - ] - }) - r = dict() - for suburl in cfg: - in_json = self._get_json(suburl) - for new_key, orig_key, xmute in cfg[suburl]: - try: - r[new_key] = xmute(in_json[orig_key]) - except Exception as error: - self.debug(error) - return r or None diff --git a/src/collectors/python.d.plugin/ipfs/ipfs.conf b/src/collectors/python.d.plugin/ipfs/ipfs.conf deleted file mode 100644 index 8b167b399..000000000 --- a/src/collectors/python.d.plugin/ipfs/ipfs.conf +++ /dev/null @@ -1,82 +0,0 @@ -# netdata python.d.plugin configuration for ipfs -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, ipfs also supports the following: -# -# url: 'URL' # URL to the IPFS API -# repoapi: no # Collect repo metrics -# # Currently defaults to disabled due to IPFS Bug -# # https://github.com/ipfs/go-ipfs/issues/7528 -# # resulting in very high CPU Usage -# pinapi: no # Set status of IPFS pinned object polling -# # Currently defaults to disabled due to IPFS Bug -# # https://github.com/ipfs/go-ipfs/issues/3874 -# # resulting in very high CPU Usage -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name: 'local' - url: 'http://localhost:5001' - repoapi: no - pinapi: no diff --git a/src/collectors/python.d.plugin/ipfs/metadata.yaml b/src/collectors/python.d.plugin/ipfs/metadata.yaml deleted file mode 100644 index 55c39e31e..000000000 --- a/src/collectors/python.d.plugin/ipfs/metadata.yaml +++ /dev/null @@ -1,172 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: ipfs - monitored_instance: - name: IPFS - link: "https://ipfs.tech/" - categories: - - data-collection.storage-mount-points-and-filesystems - icon_filename: "ipfs.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: [] - most_popular: false - overview: - data_collection: - metrics_description: "This collector monitors IPFS server metrics about its quality and performance." - method_description: "It connects to an http endpoint of the IPFS server to collect the metrics" - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "If the endpoint is accessible by the Agent, netdata will autodetect it" - limits: - description: | - Calls to the following endpoints are disabled due to IPFS bugs: - - /api/v0/stats/repo (https://github.com/ipfs/go-ipfs/issues/3874) - /api/v0/pin/ls (https://github.com/ipfs/go-ipfs/issues/7528) - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "python.d/ipfs.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: The JOB's name as it will appear at the dashboard (by default is the job_name) - default_value: job_name - required: false - - name: url - description: URL to the IPFS API - default_value: no - required: true - - name: repoapi - description: Collect repo metrics. - default_value: no - required: false - - name: pinapi - description: Set status of IPFS pinned object polling. - default_value: no - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic (default out-of-the-box) - description: A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. - folding: - enabled: false - config: | - localhost: - name: 'local' - url: 'http://localhost:5001' - repoapi: no - pinapi: no - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - localhost: - name: 'local' - url: 'http://localhost:5001' - repoapi: no - pinapi: no - - remote_host: - name: 'remote' - url: 'http://192.0.2.1:5001' - repoapi: no - pinapi: no - troubleshooting: - problems: - list: [] - alerts: - - name: ipfs_datastore_usage - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/ipfs.conf - metric: ipfs.repo_size - info: IPFS datastore utilization - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: ipfs.bandwidth - description: IPFS Bandwidth - unit: "kilobits/s" - chart_type: line - dimensions: - - name: in - - name: out - - name: ipfs.peers - description: IPFS Peers - unit: "peers" - chart_type: line - dimensions: - - name: peers - - name: ipfs.repo_size - description: IPFS Repo Size - unit: "GiB" - chart_type: area - dimensions: - - name: avail - - name: size - - name: ipfs.repo_objects - description: IPFS Repo Objects - unit: "objects" - chart_type: line - dimensions: - - name: objects - - name: pinned - - name: recursive_pins diff --git a/src/collectors/python.d.plugin/memcached/README.md b/src/collectors/python.d.plugin/memcached/README.md deleted file mode 120000 index 2cb76d33c..000000000 --- a/src/collectors/python.d.plugin/memcached/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/memcached.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/memcached/integrations/memcached.md b/src/collectors/python.d.plugin/memcached/integrations/memcached.md deleted file mode 100644 index 5e813eac2..000000000 --- a/src/collectors/python.d.plugin/memcached/integrations/memcached.md +++ /dev/null @@ -1,215 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/memcached/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/memcached/metadata.yaml" -sidebar_label: "Memcached" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Databases" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Memcached - - -<img src="https://netdata.cloud/img/memcached.svg" width="150"/> - - -Plugin: python.d.plugin -Module: memcached - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching. - -It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats)). - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If no configuration is given, collector will attempt to connect to memcached instance on `127.0.0.1:11211` address. - - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Memcached instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| memcached.cache | available, used | MiB | -| memcached.net | in, out | kilobits/s | -| memcached.connections | current, rejected, total | connections/s | -| memcached.items | current, total | items | -| memcached.evicted_reclaimed | reclaimed, evicted | items | -| memcached.get | hints, misses | requests | -| memcached.get_rate | rate | requests/s | -| memcached.set_rate | rate | requests/s | -| memcached.delete | hits, misses | requests | -| memcached.cas | hits, misses, bad value | requests | -| memcached.increment | hits, misses | requests | -| memcached.decrement | hits, misses | requests | -| memcached.touch | hits, misses | requests | -| memcached.touch_rate | rate | requests/s | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ memcached_cache_memory_usage ](https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf) | memcached.cache | cache memory utilization | -| [ memcached_cache_fill_rate ](https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf) | memcached.cache | average rate the cache fills up (positive), or frees up (negative) space over the last hour | -| [ memcached_out_of_cache_space_time ](https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf) | memcached.cache | estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour | - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/memcached.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/memcached.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| host | the host to connect to. | 127.0.0.1 | no | -| port | the port to connect to. | 11211 | no | -| update_every | Sets the default data collection frequency. | 10 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | - -</details> - -#### Examples - -##### localhost - -An example configuration for localhost. - -```yaml -localhost: - name: 'local' - host: 'localhost' - port: 11211 - -``` -##### localipv4 - -An example configuration for localipv4. - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local' - host: '127.0.0.1' - port: 11211 - -``` -</details> - -##### localipv6 - -An example configuration for localipv6. - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local' - host: '::1' - port: 11211 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `memcached` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin memcached debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/memcached/memcached.chart.py b/src/collectors/python.d.plugin/memcached/memcached.chart.py deleted file mode 100644 index adb9560b7..000000000 --- a/src/collectors/python.d.plugin/memcached/memcached.chart.py +++ /dev/null @@ -1,197 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: memcached netdata python.d module -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.SocketService import SocketService - -ORDER = [ - 'cache', - 'net', - 'connections', - 'items', - 'evicted_reclaimed', - 'get', - 'get_rate', - 'set_rate', - 'cas', - 'delete', - 'increment', - 'decrement', - 'touch', - 'touch_rate', -] - -CHARTS = { - 'cache': { - 'options': [None, 'Cache Size', 'MiB', 'cache', 'memcached.cache', 'stacked'], - 'lines': [ - ['avail', 'available', 'absolute', 1, 1 << 20], - ['used', 'used', 'absolute', 1, 1 << 20] - ] - }, - 'net': { - 'options': [None, 'Network', 'kilobits/s', 'network', 'memcached.net', 'area'], - 'lines': [ - ['bytes_read', 'in', 'incremental', 8, 1000], - ['bytes_written', 'out', 'incremental', -8, 1000], - ] - }, - 'connections': { - 'options': [None, 'Connections', 'connections/s', 'connections', 'memcached.connections', 'line'], - 'lines': [ - ['curr_connections', 'current', 'incremental'], - ['rejected_connections', 'rejected', 'incremental'], - ['total_connections', 'total', 'incremental'] - ] - }, - 'items': { - 'options': [None, 'Items', 'items', 'items', 'memcached.items', 'line'], - 'lines': [ - ['curr_items', 'current', 'absolute'], - ['total_items', 'total', 'absolute'] - ] - }, - 'evicted_reclaimed': { - 'options': [None, 'Evicted and Reclaimed Items', 'items', 'items', 'memcached.evicted_reclaimed', 'line'], - 'lines': [ - ['reclaimed', 'reclaimed', 'absolute'], - ['evictions', 'evicted', 'absolute'] - ] - }, - 'get': { - 'options': [None, 'Get Requests', 'requests', 'get ops', 'memcached.get', 'stacked'], - 'lines': [ - ['get_hits', 'hits', 'percent-of-absolute-row'], - ['get_misses', 'misses', 'percent-of-absolute-row'] - ] - }, - 'get_rate': { - 'options': [None, 'Get Request Rate', 'requests/s', 'get ops', 'memcached.get_rate', 'line'], - 'lines': [ - ['cmd_get', 'rate', 'incremental'] - ] - }, - 'set_rate': { - 'options': [None, 'Set Request Rate', 'requests/s', 'set ops', 'memcached.set_rate', 'line'], - 'lines': [ - ['cmd_set', 'rate', 'incremental'] - ] - }, - 'delete': { - 'options': [None, 'Delete Requests', 'requests', 'delete ops', 'memcached.delete', 'stacked'], - 'lines': [ - ['delete_hits', 'hits', 'percent-of-absolute-row'], - ['delete_misses', 'misses', 'percent-of-absolute-row'], - ] - }, - 'cas': { - 'options': [None, 'Check and Set Requests', 'requests', 'check and set ops', 'memcached.cas', 'stacked'], - 'lines': [ - ['cas_hits', 'hits', 'percent-of-absolute-row'], - ['cas_misses', 'misses', 'percent-of-absolute-row'], - ['cas_badval', 'bad value', 'percent-of-absolute-row'] - ] - }, - 'increment': { - 'options': [None, 'Increment Requests', 'requests', 'increment ops', 'memcached.increment', 'stacked'], - 'lines': [ - ['incr_hits', 'hits', 'percent-of-absolute-row'], - ['incr_misses', 'misses', 'percent-of-absolute-row'] - ] - }, - 'decrement': { - 'options': [None, 'Decrement Requests', 'requests', 'decrement ops', 'memcached.decrement', 'stacked'], - 'lines': [ - ['decr_hits', 'hits', 'percent-of-absolute-row'], - ['decr_misses', 'misses', 'percent-of-absolute-row'] - ] - }, - 'touch': { - 'options': [None, 'Touch Requests', 'requests', 'touch ops', 'memcached.touch', 'stacked'], - 'lines': [ - ['touch_hits', 'hits', 'percent-of-absolute-row'], - ['touch_misses', 'misses', 'percent-of-absolute-row'] - ] - }, - 'touch_rate': { - 'options': [None, 'Touch Request Rate', 'requests/s', 'touch ops', 'memcached.touch_rate', 'line'], - 'lines': [ - ['cmd_touch', 'rate', 'incremental'] - ] - } -} - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - SocketService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.request = 'stats\r\n' - self.host = 'localhost' - self.port = 11211 - self._keep_alive = True - self.unix_socket = None - - def _get_data(self): - """ - Get data from socket - :return: dict - """ - response = self._get_raw_data() - if response is None: - # error has already been logged - return None - - if response.startswith('ERROR'): - self.error('received ERROR') - return None - - try: - parsed = response.split('\n') - except AttributeError: - self.error('response is invalid/empty') - return None - - # split the response - data = {} - for line in parsed: - if line.startswith('STAT'): - try: - t = line[5:].split(' ') - data[t[0]] = t[1] - except (IndexError, ValueError): - self.debug('invalid line received: ' + str(line)) - - if not data: - self.error("received data doesn't have any records") - return None - - # custom calculations - try: - data['avail'] = int(data['limit_maxbytes']) - int(data['bytes']) - data['used'] = int(data['bytes']) - except (KeyError, ValueError, TypeError): - pass - - return data - - def _check_raw_data(self, data): - if data.endswith('END\r\n'): - self.debug('received full response from memcached') - return True - - self.debug('waiting more data from memcached') - return False - - def check(self): - """ - Parse configuration, check if memcached is available - :return: boolean - """ - self._parse_config() - data = self._get_data() - if data is None: - return False - return True diff --git a/src/collectors/python.d.plugin/memcached/memcached.conf b/src/collectors/python.d.plugin/memcached/memcached.conf deleted file mode 100644 index 3286b4623..000000000 --- a/src/collectors/python.d.plugin/memcached/memcached.conf +++ /dev/null @@ -1,90 +0,0 @@ -# netdata python.d.plugin configuration for memcached -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, memcached also supports the following: -# -# socket: 'path/to/memcached.sock' -# -# or -# host: 'IP or HOSTNAME' # the host to connect to -# port: PORT # the port to connect to -# -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name : 'local' - host : 'localhost' - port : 11211 - -localipv4: - name : 'local' - host : '127.0.0.1' - port : 11211 - -localipv6: - name : 'local' - host : '::1' - port : 11211 - diff --git a/src/collectors/python.d.plugin/memcached/metadata.yaml b/src/collectors/python.d.plugin/memcached/metadata.yaml deleted file mode 100644 index ae420f1c1..000000000 --- a/src/collectors/python.d.plugin/memcached/metadata.yaml +++ /dev/null @@ -1,247 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: memcached - monitored_instance: - name: Memcached - link: https://memcached.org/ - categories: - - data-collection.database-servers - icon_filename: "memcached.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - memcached - - memcache - - cache - - database - most_popular: false - overview: - data_collection: - metrics_description: "Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching." - method_description: "It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats))." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: | - If no configuration is given, collector will attempt to connect to memcached instance on `127.0.0.1:11211` address. - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: python.d/memcached.conf - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: Config options - enabled: true - list: - - name: host - description: the host to connect to. - default_value: "127.0.0.1" - required: false - - name: port - description: the port to connect to. - default_value: "11211" - required: false - - name: update_every - description: Sets the default data collection frequency. - default_value: 10 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: localhost - description: An example configuration for localhost. - folding: - enabled: false - config: | - localhost: - name: 'local' - host: 'localhost' - port: 11211 - - name: localipv4 - description: An example configuration for localipv4. - folding: - enabled: true - config: | - localhost: - name: 'local' - host: '127.0.0.1' - port: 11211 - - name: localipv6 - description: An example configuration for localipv6. - folding: - enabled: true - config: | - localhost: - name: 'local' - host: '::1' - port: 11211 - troubleshooting: - problems: - list: [] - alerts: - - name: memcached_cache_memory_usage - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf - metric: memcached.cache - info: cache memory utilization - - name: memcached_cache_fill_rate - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf - metric: memcached.cache - info: average rate the cache fills up (positive), or frees up (negative) space over the last hour - - name: memcached_out_of_cache_space_time - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/memcached.conf - metric: memcached.cache - info: estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: memcached.cache - description: Cache Size - unit: "MiB" - chart_type: stacked - dimensions: - - name: available - - name: used - - name: memcached.net - description: Network - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: memcached.connections - description: Connections - unit: "connections/s" - chart_type: line - dimensions: - - name: current - - name: rejected - - name: total - - name: memcached.items - description: Items - unit: "items" - chart_type: line - dimensions: - - name: current - - name: total - - name: memcached.evicted_reclaimed - description: Evicted and Reclaimed Items - unit: "items" - chart_type: line - dimensions: - - name: reclaimed - - name: evicted - - name: memcached.get - description: Get Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hints - - name: misses - - name: memcached.get_rate - description: Get Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate - - name: memcached.set_rate - description: Set Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate - - name: memcached.delete - description: Delete Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.cas - description: Check and Set Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: bad value - - name: memcached.increment - description: Increment Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.decrement - description: Decrement Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.touch - description: Touch Requests - unit: "requests" - chart_type: stacked - dimensions: - - name: hits - - name: misses - - name: memcached.touch_rate - description: Touch Request Rate - unit: "requests/s" - chart_type: line - dimensions: - - name: rate diff --git a/src/collectors/python.d.plugin/monit/README.md b/src/collectors/python.d.plugin/monit/README.md deleted file mode 120000 index ac69496f4..000000000 --- a/src/collectors/python.d.plugin/monit/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/monit.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/monit/integrations/monit.md b/src/collectors/python.d.plugin/monit/integrations/monit.md deleted file mode 100644 index d14d2a963..000000000 --- a/src/collectors/python.d.plugin/monit/integrations/monit.md +++ /dev/null @@ -1,214 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/monit/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/monit/metadata.yaml" -sidebar_label: "Monit" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Synthetic Checks" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Monit - - -<img src="https://netdata.cloud/img/monit.png" width="150"/> - - -Plugin: python.d.plugin -Module: monit - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Monit targets such as filesystems, directories, files, FIFO pipes and more. - - -It gathers data from Monit's XML interface. - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default, this collector will attempt to connect to Monit at `http://localhost:2812` - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Monit instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| monit.filesystems | a dimension per target | filesystems | -| monit.directories | a dimension per target | directories | -| monit.files | a dimension per target | files | -| monit.fifos | a dimension per target | pipes | -| monit.programs | a dimension per target | programs | -| monit.services | a dimension per target | processes | -| monit.process_uptime | a dimension per target | seconds | -| monit.process_threads | a dimension per target | threads | -| monit.process_childrens | a dimension per target | children | -| monit.hosts | a dimension per target | hosts | -| monit.host_latency | a dimension per target | milliseconds | -| monit.networks | a dimension per target | interfaces | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/monit.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/monit.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | -| url | The URL to fetch Monit's metrics. | http://localhost:2812 | yes | -| user | Username in case the URL is password protected. | | no | -| pass | Password in case the URL is password protected. | | no | - -</details> - -#### Examples - -##### Basic - -A basic configuration example. - -```yaml -localhost: - name : 'local' - url : 'http://localhost:2812' - -``` -##### Basic Authentication - -Example using basic username and password in order to authenticate. - -<details open><summary>Config</summary> - -```yaml -localhost: - name : 'local' - url : 'http://localhost:2812' - user: 'foo' - pass: 'bar' - -``` -</details> - -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local' - url: 'http://localhost:2812' - -remote_job: - name: 'remote' - url: 'http://192.0.2.1:2812' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `monit` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin monit debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/monit/metadata.yaml b/src/collectors/python.d.plugin/monit/metadata.yaml deleted file mode 100644 index b51273188..000000000 --- a/src/collectors/python.d.plugin/monit/metadata.yaml +++ /dev/null @@ -1,217 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: monit - monitored_instance: - name: Monit - link: https://mmonit.com/monit/ - categories: - - data-collection.synthetic-checks - icon_filename: "monit.png" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - monit - - mmonit - - supervision tool - - monitrc - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors Monit targets such as filesystems, directories, files, FIFO pipes and more. - method_description: | - It gathers data from Monit's XML interface. - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: By default, this collector will attempt to connect to Monit at `http://localhost:2812` - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "python.d/monit.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: > - Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "local" - required: false - - name: url - description: The URL to fetch Monit's metrics. - default_value: http://localhost:2812 - required: true - - name: user - description: Username in case the URL is password protected. - default_value: "" - required: false - - name: pass - description: Password in case the URL is password protected. - default_value: "" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic configuration example. - folding: - enabled: false - config: | - localhost: - name : 'local' - url : 'http://localhost:2812' - - name: Basic Authentication - description: Example using basic username and password in order to authenticate. - config: | - localhost: - name : 'local' - url : 'http://localhost:2812' - user: 'foo' - pass: 'bar' - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - localhost: - name: 'local' - url: 'http://localhost:2812' - - remote_job: - name: 'remote' - url: 'http://192.0.2.1:2812' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: monit.filesystems - description: Filesystems - unit: "filesystems" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.directories - description: Directories - unit: "directories" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.files - description: Files - unit: "files" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.fifos - description: Pipes (fifo) - unit: "pipes" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.programs - description: Programs statuses - unit: "programs" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.services - description: Processes statuses - unit: "processes" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_uptime - description: Processes uptime - unit: "seconds" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_threads - description: Processes threads - unit: "threads" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.process_childrens - description: Child processes - unit: "children" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.hosts - description: Hosts - unit: "hosts" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.host_latency - description: Hosts latency - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per target - - name: monit.networks - description: Network interfaces and addresses - unit: "interfaces" - chart_type: line - dimensions: - - name: a dimension per target diff --git a/src/collectors/python.d.plugin/monit/monit.chart.py b/src/collectors/python.d.plugin/monit/monit.chart.py deleted file mode 100644 index 5d926961b..000000000 --- a/src/collectors/python.d.plugin/monit/monit.chart.py +++ /dev/null @@ -1,360 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: monit netdata python.d module -# Author: Evgeniy K. (n0guest) -# SPDX-License-Identifier: GPL-3.0-or-later - -import xml.etree.ElementTree as ET -from collections import namedtuple - -from bases.FrameworkServices.UrlService import UrlService - -MonitType = namedtuple('MonitType', ('index', 'name')) - -# see enum Service_Type from monit.h (https://bitbucket.org/tildeslash/monit/src/master/src/monit.h) -# typedef enum { -# Service_Filesystem = 0, -# Service_Directory, -# Service_File, -# Service_Process, -# Service_Host, -# Service_System, -# Service_Fifo, -# Service_Program, -# Service_Net, -# Service_Last = Service_Net -# } __attribute__((__packed__)) Service_Type; - -TYPE_FILESYSTEM = MonitType(0, 'filesystem') -TYPE_DIRECTORY = MonitType(1, 'directory') -TYPE_FILE = MonitType(2, 'file') -TYPE_PROCESS = MonitType(3, 'process') -TYPE_HOST = MonitType(4, 'host') -TYPE_SYSTEM = MonitType(5, 'system') -TYPE_FIFO = MonitType(6, 'fifo') -TYPE_PROGRAM = MonitType(7, 'program') -TYPE_NET = MonitType(8, 'net') - -TYPES = ( - TYPE_FILESYSTEM, - TYPE_DIRECTORY, - TYPE_FILE, - TYPE_PROCESS, - TYPE_HOST, - TYPE_SYSTEM, - TYPE_FIFO, - TYPE_PROGRAM, - TYPE_NET, -) - -# charts order (can be overridden if you want less charts, or different order) -ORDER = [ - 'filesystem', - 'directory', - 'file', - 'process', - 'process_uptime', - 'process_threads', - 'process_children', - 'host', - 'host_latency', - 'system', - 'fifo', - 'program', - 'net' -] - -CHARTS = { - 'filesystem': { - 'options': ['filesystems', 'Filesystems', 'filesystems', 'filesystem', 'monit.filesystems', 'line'], - 'lines': [] - }, - 'directory': { - 'options': ['directories', 'Directories', 'directories', 'filesystem', 'monit.directories', 'line'], - 'lines': [] - }, - 'file': { - 'options': ['files', 'Files', 'files', 'filesystem', 'monit.files', 'line'], - 'lines': [] - }, - 'fifo': { - 'options': ['fifos', 'Pipes (fifo)', 'pipes', 'filesystem', 'monit.fifos', 'line'], - 'lines': [] - }, - 'program': { - 'options': ['programs', 'Programs statuses', 'programs', 'applications', 'monit.programs', 'line'], - 'lines': [] - }, - 'process': { - 'options': ['processes', 'Processes statuses', 'processes', 'applications', 'monit.services', 'line'], - 'lines': [] - }, - 'process_uptime': { - 'options': ['processes uptime', 'Processes uptime', 'seconds', 'applications', - 'monit.process_uptime', 'line', 'hidden'], - 'lines': [] - }, - 'process_threads': { - 'options': ['processes threads', 'Processes threads', 'threads', 'applications', - 'monit.process_threads', 'line'], - 'lines': [] - }, - 'process_children': { - 'options': ['processes childrens', 'Child processes', 'children', 'applications', - 'monit.process_childrens', 'line'], - 'lines': [] - }, - 'host': { - 'options': ['hosts', 'Hosts', 'hosts', 'network', 'monit.hosts', 'line'], - 'lines': [] - }, - 'host_latency': { - 'options': ['hosts latency', 'Hosts latency', 'milliseconds', 'network', 'monit.host_latency', 'line'], - 'lines': [] - }, - 'net': { - 'options': ['interfaces', 'Network interfaces and addresses', 'interfaces', 'network', - 'monit.networks', 'line'], - 'lines': [] - }, -} - - -class BaseMonitService(object): - def __init__(self, typ, name, status, monitor): - self.type = typ - self.name = name - self.status = status - self.monitor = monitor - - def __repr__(self): - return 'MonitService({0}:{1})'.format(self.type.name, self.name) - - def __eq__(self, other): - if not isinstance(other, BaseMonitService): - return False - return self.type == other.type and self.name == other.name - - def __ne__(self, other): - return not self == other - - def __hash__(self): - return hash(repr(self)) - - def is_running(self): - return self.status == '0' and self.monitor == '1' - - def key(self): - return '{0}_{1}'.format(self.type.name, self.name) - - def data(self): - return {self.key(): int(self.is_running())} - - -class ProcessMonitService(BaseMonitService): - def __init__(self, typ, name, status, monitor): - super(ProcessMonitService, self).__init__(typ, name, status, monitor) - self.uptime = None - self.threads = None - self.children = None - - def __eq__(self, other): - return super(ProcessMonitService, self).__eq__(other) - - def __ne__(self, other): - return super(ProcessMonitService, self).__ne__(other) - - def __hash__(self): - return super(ProcessMonitService, self).__hash__() - - def uptime_key(self): - return 'process_uptime_{0}'.format(self.name) - - def threads_key(self): - return 'process_threads_{0}'.format(self.name) - - def children_key(self): - return 'process_children_{0}'.format(self.name) - - def data(self): - base_data = super(ProcessMonitService, self).data() - # skipping bugged metrics with negative uptime (monit before v5.16) - uptime = self.uptime if self.uptime and int(self.uptime) >= 0 else None - data = { - self.uptime_key(): uptime, - self.threads_key(): self.threads, - self.children_key(): self.children, - } - data.update(base_data) - - return data - - -class HostMonitService(BaseMonitService): - def __init__(self, typ, name, status, monitor): - super(HostMonitService, self).__init__(typ, name, status, monitor) - self.latency = None - - def __eq__(self, other): - return super(HostMonitService, self).__eq__(other) - - def __ne__(self, other): - return super(HostMonitService, self).__ne__(other) - - def __hash__(self): - return super(HostMonitService, self).__hash__() - - def latency_key(self): - return 'host_latency_{0}'.format(self.name) - - def data(self): - base_data = super(HostMonitService, self).data() - latency = float(self.latency) * 1000000 if self.latency else None - data = {self.latency_key(): latency} - data.update(base_data) - - return data - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - base_url = self.configuration.get('url', "http://localhost:2812") - self.url = '{0}/_status?format=xml&level=full'.format(base_url) - self.active_services = list() - - def parse(self, raw): - try: - root = ET.fromstring(raw) - except ET.ParseError: - self.error("URL {0} didn't return a valid XML page. Please check your settings.".format(self.url)) - return None - return root - - def _get_data(self): - raw = self._get_raw_data() - if not raw: - return None - - root = self.parse(raw) - if root is None: - return None - - services = self.get_services(root) - if not services: - return None - - if len(self.charts) > 0: - self.update_charts(services) - - data = dict() - - for svc in services: - data.update(svc.data()) - - return data - - def get_services(self, root): - services = list() - - for typ in TYPES: - if typ == TYPE_SYSTEM: - self.debug("skipping service from '{0}' category, it's useless in graphs".format(TYPE_SYSTEM.name)) - continue - - xpath_query = "./service[@type='{0}']".format(typ.index) - self.debug('Searching for {0} as {1}'.format(typ.name, xpath_query)) - - for svc_root in root.findall(xpath_query): - svc = create_service(svc_root, typ) - self.debug('=> found {0} with type={1}, status={2}, monitoring={3}'.format( - svc.name, svc.type.name, svc.status, svc.monitor)) - - services.append(svc) - - return services - - def update_charts(self, services): - remove = [svc for svc in self.active_services if svc not in services] - add = [svc for svc in services if svc not in self.active_services] - - self.remove_services_from_charts(remove) - self.add_services_to_charts(add) - - self.active_services = services - - def add_services_to_charts(self, services): - for svc in services: - if svc.type == TYPE_HOST: - self.charts['host_latency'].add_dimension([svc.latency_key(), svc.name, 'absolute', 1000, 1000000]) - if svc.type == TYPE_PROCESS: - self.charts['process_uptime'].add_dimension([svc.uptime_key(), svc.name]) - self.charts['process_threads'].add_dimension([svc.threads_key(), svc.name]) - self.charts['process_children'].add_dimension([svc.children_key(), svc.name]) - self.charts[svc.type.name].add_dimension([svc.key(), svc.name]) - - def remove_services_from_charts(self, services): - for svc in services: - if svc.type == TYPE_HOST: - self.charts['host_latency'].del_dimension(svc.latency_key(), False) - if svc.type == TYPE_PROCESS: - self.charts['process_uptime'].del_dimension(svc.uptime_key(), False) - self.charts['process_threads'].del_dimension(svc.threads_key(), False) - self.charts['process_children'].del_dimension(svc.children_key(), False) - self.charts[svc.type.name].del_dimension(svc.key(), False) - - -def create_service(root, typ): - if typ == TYPE_HOST: - return create_host_service(root) - elif typ == TYPE_PROCESS: - return create_process_service(root) - return create_base_service(root, typ) - - -def create_host_service(root): - svc = HostMonitService( - TYPE_HOST, - root.find('name').text, - root.find('status').text, - root.find('monitor').text, - ) - - latency = root.find('./icmp/responsetime') - if latency is not None: - svc.latency = latency.text - - return svc - - -def create_process_service(root): - svc = ProcessMonitService( - TYPE_PROCESS, - root.find('name').text, - root.find('status').text, - root.find('monitor').text, - ) - - uptime = root.find('uptime') - if uptime is not None: - svc.uptime = uptime.text - - threads = root.find('threads') - if threads is not None: - svc.threads = threads.text - - children = root.find('children') - if children is not None: - svc.children = children.text - - return svc - - -def create_base_service(root, typ): - return BaseMonitService( - typ, - root.find('name').text, - root.find('status').text, - root.find('monitor').text, - ) diff --git a/src/collectors/python.d.plugin/monit/monit.conf b/src/collectors/python.d.plugin/monit/monit.conf deleted file mode 100644 index 9a3fb6938..000000000 --- a/src/collectors/python.d.plugin/monit/monit.conf +++ /dev/null @@ -1,86 +0,0 @@ -# netdata python.d.plugin configuration for monit -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, this plugin also supports the following: -# -# url: 'URL' # the URL to fetch monit's status stats -# -# if the URL is password protected, the following are supported: -# -# user: 'username' -# pass: 'password' -# -# Example -# -# local: -# name : 'Local Monit' -# url : 'http://localhost:2812' -# -# "local" will show up in Netdata logs. "Reverse Proxy" will show up in the menu -# in the monit section. - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name : 'local' - url : 'http://localhost:2812' diff --git a/src/collectors/python.d.plugin/nsd/README.md b/src/collectors/python.d.plugin/nsd/README.md deleted file mode 120000 index 59fcfe491..000000000 --- a/src/collectors/python.d.plugin/nsd/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/name_server_daemon.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md b/src/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md deleted file mode 100644 index 357812d3d..000000000 --- a/src/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md +++ /dev/null @@ -1,199 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/nsd/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/nsd/metadata.yaml" -sidebar_label: "Name Server Daemon" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/DNS and DHCP Servers" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Name Server Daemon - - -<img src="https://netdata.cloud/img/nsd.svg" width="150"/> - - -Plugin: python.d.plugin -Module: nsd - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors NSD statistics like queries, zones, protocols, query types and more. - - -It uses the `nsd-control stats_noreset` command to gather metrics. - - -This collector is supported on all platforms. - -This collector only supports collecting metrics from a single instance of this integration. - - -### Default Behavior - -#### Auto-Detection - -If permissions are satisfied, the collector will be able to run `nsd-control stats_noreset`, thus collecting metrics. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Name Server Daemon instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| nsd.queries | queries | queries/s | -| nsd.zones | master, slave | zones | -| nsd.protocols | udp, udp6, tcp, tcp6 | queries/s | -| nsd.type | A, NS, CNAME, SOA, PTR, HINFO, MX, NAPTR, TXT, AAAA, SRV, ANY | queries/s | -| nsd.transfer | NOTIFY, AXFR | queries/s | -| nsd.rcode | NOERROR, FORMERR, SERVFAIL, NXDOMAIN, NOTIMP, REFUSED, YXDOMAIN | queries/s | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### NSD version - -The version of `nsd` must be 4.0+. - - -#### Provide Netdata the permissions to run the command - -Netdata must have permissions to run the `nsd-control stats_noreset` command. - -You can: - -- Add "netdata" user to "nsd" group: - ``` - usermod -aG nsd netdata - ``` -- Add Netdata to sudoers - 1. Edit the sudoers file: - ``` - visudo -f /etc/sudoers.d/netdata - ``` - 2. Add the entry: - ``` - Defaults:netdata !requiretty - netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset - ``` - - > Note that you will need to set the `command` option to `sudo /usr/sbin/nsd-control stats_noreset` if you use this method. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/nsd.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/nsd.conf -``` -#### Options - -This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 30 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| command | The command to run | nsd-control stats_noreset | no | - -</details> - -#### Examples - -##### Basic - -A basic configuration example. - -```yaml -local: - name: 'nsd_local' - command: 'nsd-control stats_noreset' - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `nsd` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin nsd debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/nsd/metadata.yaml b/src/collectors/python.d.plugin/nsd/metadata.yaml deleted file mode 100644 index f5e2c46b0..000000000 --- a/src/collectors/python.d.plugin/nsd/metadata.yaml +++ /dev/null @@ -1,201 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: nsd - monitored_instance: - name: Name Server Daemon - link: https://nsd.docs.nlnetlabs.nl/en/latest/# - categories: - - data-collection.dns-and-dhcp-servers - icon_filename: "nsd.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - nsd - - name server daemon - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors NSD statistics like queries, zones, protocols, query types and more. - method_description: | - It uses the `nsd-control stats_noreset` command to gather metrics. - supported_platforms: - include: [] - exclude: [] - multi_instance: false - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: If permissions are satisfied, the collector will be able to run `nsd-control stats_noreset`, thus collecting metrics. - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: NSD version - description: | - The version of `nsd` must be 4.0+. - - title: Provide Netdata the permissions to run the command - description: | - Netdata must have permissions to run the `nsd-control stats_noreset` command. - - You can: - - - Add "netdata" user to "nsd" group: - ``` - usermod -aG nsd netdata - ``` - - Add Netdata to sudoers - 1. Edit the sudoers file: - ``` - visudo -f /etc/sudoers.d/netdata - ``` - 2. Add the entry: - ``` - Defaults:netdata !requiretty - netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset - ``` - - > Note that you will need to set the `command` option to `sudo /usr/sbin/nsd-control stats_noreset` if you use this method. - - configuration: - file: - name: "python.d/nsd.conf" - options: - description: | - This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. - - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 30 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: > - Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed - running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: command - description: The command to run - default_value: "nsd-control stats_noreset" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic configuration example. - folding: - enabled: false - config: | - local: - name: 'nsd_local' - command: 'nsd-control stats_noreset' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: nsd.queries - description: queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: nsd.zones - description: zones - unit: "zones" - chart_type: stacked - dimensions: - - name: master - - name: slave - - name: nsd.protocols - description: protocol - unit: "queries/s" - chart_type: stacked - dimensions: - - name: udp - - name: udp6 - - name: tcp - - name: tcp6 - - name: nsd.type - description: query type - unit: "queries/s" - chart_type: stacked - dimensions: - - name: A - - name: NS - - name: CNAME - - name: SOA - - name: PTR - - name: HINFO - - name: MX - - name: NAPTR - - name: TXT - - name: AAAA - - name: SRV - - name: ANY - - name: nsd.transfer - description: transfer - unit: "queries/s" - chart_type: stacked - dimensions: - - name: NOTIFY - - name: AXFR - - name: nsd.rcode - description: return code - unit: "queries/s" - chart_type: stacked - dimensions: - - name: NOERROR - - name: FORMERR - - name: SERVFAIL - - name: NXDOMAIN - - name: NOTIMP - - name: REFUSED - - name: YXDOMAIN diff --git a/src/collectors/python.d.plugin/nsd/nsd.chart.py b/src/collectors/python.d.plugin/nsd/nsd.chart.py deleted file mode 100644 index 6f9b2cec8..000000000 --- a/src/collectors/python.d.plugin/nsd/nsd.chart.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: NSD `nsd-control stats_noreset` netdata python.d module -# Author: <383c57 at gmail.com> -# SPDX-License-Identifier: GPL-3.0-or-later - -import re - -from bases.FrameworkServices.ExecutableService import ExecutableService - -update_every = 30 - -NSD_CONTROL_COMMAND = 'nsd-control stats_noreset' -REGEX = re.compile(r'([A-Za-z0-9.]+)=(\d+)') - -ORDER = [ - 'queries', - 'zones', - 'protocol', - 'type', - 'transfer', - 'rcode', -] - -CHARTS = { - 'queries': { - 'options': [None, 'queries', 'queries/s', 'queries', 'nsd.queries', 'line'], - 'lines': [ - ['num_queries', 'queries', 'incremental'] - ] - }, - 'zones': { - 'options': [None, 'zones', 'zones', 'zones', 'nsd.zones', 'stacked'], - 'lines': [ - ['zone_master', 'master', 'absolute'], - ['zone_slave', 'slave', 'absolute'] - ] - }, - 'protocol': { - 'options': [None, 'protocol', 'queries/s', 'protocol', 'nsd.protocols', 'stacked'], - 'lines': [ - ['num_udp', 'udp', 'incremental'], - ['num_udp6', 'udp6', 'incremental'], - ['num_tcp', 'tcp', 'incremental'], - ['num_tcp6', 'tcp6', 'incremental'] - ] - }, - 'type': { - 'options': [None, 'query type', 'queries/s', 'query type', 'nsd.type', 'stacked'], - 'lines': [ - ['num_type_A', 'A', 'incremental'], - ['num_type_NS', 'NS', 'incremental'], - ['num_type_CNAME', 'CNAME', 'incremental'], - ['num_type_SOA', 'SOA', 'incremental'], - ['num_type_PTR', 'PTR', 'incremental'], - ['num_type_HINFO', 'HINFO', 'incremental'], - ['num_type_MX', 'MX', 'incremental'], - ['num_type_NAPTR', 'NAPTR', 'incremental'], - ['num_type_TXT', 'TXT', 'incremental'], - ['num_type_AAAA', 'AAAA', 'incremental'], - ['num_type_SRV', 'SRV', 'incremental'], - ['num_type_TYPE255', 'ANY', 'incremental'] - ] - }, - 'transfer': { - 'options': [None, 'transfer', 'queries/s', 'transfer', 'nsd.transfer', 'stacked'], - 'lines': [ - ['num_opcode_NOTIFY', 'NOTIFY', 'incremental'], - ['num_type_TYPE252', 'AXFR', 'incremental'] - ] - }, - 'rcode': { - 'options': [None, 'return code', 'queries/s', 'return code', 'nsd.rcode', 'stacked'], - 'lines': [ - ['num_rcode_NOERROR', 'NOERROR', 'incremental'], - ['num_rcode_FORMERR', 'FORMERR', 'incremental'], - ['num_rcode_SERVFAIL', 'SERVFAIL', 'incremental'], - ['num_rcode_NXDOMAIN', 'NXDOMAIN', 'incremental'], - ['num_rcode_NOTIMP', 'NOTIMP', 'incremental'], - ['num_rcode_REFUSED', 'REFUSED', 'incremental'], - ['num_rcode_YXDOMAIN', 'YXDOMAIN', 'incremental'] - ] - } -} - - -class Service(ExecutableService): - def __init__(self, configuration=None, name=None): - ExecutableService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.command = NSD_CONTROL_COMMAND - - def _get_data(self): - lines = self._get_raw_data() - if not lines: - return None - - stats = dict( - (k.replace('.', '_'), int(v)) for k, v in REGEX.findall(''.join(lines)) - ) - stats.setdefault('num_opcode_NOTIFY', 0) - stats.setdefault('num_type_TYPE252', 0) - stats.setdefault('num_type_TYPE255', 0) - - return stats diff --git a/src/collectors/python.d.plugin/nsd/nsd.conf b/src/collectors/python.d.plugin/nsd/nsd.conf deleted file mode 100644 index 77a8a3177..000000000 --- a/src/collectors/python.d.plugin/nsd/nsd.conf +++ /dev/null @@ -1,91 +0,0 @@ -# netdata python.d.plugin configuration for nsd -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# nsd-control is slow, so once every 30 seconds -# update_every: 30 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, nsd also supports the following: -# -# command: 'nsd-control stats_noreset' # the command to run -# - -# ---------------------------------------------------------------------- -# IMPORTANT Information -# -# Netdata must have permissions to run `nsd-control stats_noreset` command -# -# - Example-1 (use "sudo") -# 1. sudoers (e.g. visudo -f /etc/sudoers.d/netdata) -# Defaults:netdata !requiretty -# netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset -# 2. etc/netdata/python.d/nsd.conf -# local: -# update_every: 30 -# command: 'sudo /usr/sbin/nsd-control stats_noreset' -# -# - Example-2 (add "netdata" user to "nsd" group) -# usermod -aG nsd netdata -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS - -local: - update_every: 30 - command: 'nsd-control stats_noreset' diff --git a/src/collectors/python.d.plugin/nvidia_smi/README.md b/src/collectors/python.d.plugin/nvidia_smi/README.md deleted file mode 100644 index 240b65af3..000000000 --- a/src/collectors/python.d.plugin/nvidia_smi/README.md +++ /dev/null @@ -1,81 +0,0 @@ -<!-- -title: "Nvidia GPU monitoring with Netdata" -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/nvidia_smi/README.md" -sidebar_label: "nvidia_smi-python.d.plugin" -learn_status: "Published" -learn_topic_type: "References" -learn_rel_path: "Integrations/Monitor/Devices" ---> - -# Nvidia GPU collector - -Monitors performance metrics (memory usage, fan speed, pcie bandwidth utilization, temperature, etc.) using `nvidia-smi` cli tool. - -## Requirements - -- The `nvidia-smi` tool installed and your NVIDIA GPU(s) must support the tool. Mostly the newer high end models used for AI / ML and Crypto or Pro range, read more about [nvidia_smi](https://developer.nvidia.com/nvidia-system-management-interface). -- Enable this plugin, as it's disabled by default due to minor performance issues: - ```bash - cd /etc/netdata # Replace this path with your Netdata config directory, if different - sudo ./edit-config python.d.conf - ``` - Remove the '#' before nvidia_smi so it reads: `nvidia_smi: yes`. -- On some systems when the GPU is idle the `nvidia-smi` tool unloads and there is added latency again when it is next queried. If you are running GPUs under constant workload this isn't likely to be an issue. - -If using Docker, see [Netdata Docker container with NVIDIA GPUs monitoring](https://github.com/netdata/netdata/tree/master/packaging/docker#with-nvidia-gpus-monitoring). - -## Charts - -It produces the following charts: - -- PCI Express Bandwidth Utilization in `KiB/s` -- Fan Speed in `percentage` -- GPU Utilization in `percentage` -- Memory Bandwidth Utilization in `percentage` -- Encoder/Decoder Utilization in `percentage` -- Memory Usage in `MiB` -- Temperature in `celsius` -- Clock Frequencies in `MHz` -- Power Utilization in `Watts` -- Memory Used by Each Process in `MiB` -- Memory Used by Each User in `MiB` -- Number of User on GPU in `num` - -## Configuration - -Edit the `python.d/nvidia_smi.conf` configuration file using `edit-config` from the Netdata [config -directory](/docs/netdata-agent/configuration/README.md), which is typically at `/etc/netdata`. - -```bash -cd /etc/netdata # Replace this path with your Netdata config directory, if different -sudo ./edit-config python.d/nvidia_smi.conf -``` - -Sample: - -```yaml -loop_mode : yes -poll_seconds : 1 -exclude_zero_memory_users : yes -``` - - -### Troubleshooting - -To troubleshoot issues with the `nvidia_smi` module, run the `python.d.plugin` with the debug option enabled. The -output will give you the output of the data collection job or error messages on why the collector isn't working. - -First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's -not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the -plugin's directory, switch to the `netdata` user. - -```bash -cd /usr/libexec/netdata/plugins.d/ -sudo su -s /bin/bash netdata -``` - -Now you can manually run the `nvidia_smi` module in debug mode: - -```bash -./python.d.plugin nvidia_smi debug trace -``` diff --git a/src/collectors/python.d.plugin/nvidia_smi/metadata.yaml b/src/collectors/python.d.plugin/nvidia_smi/metadata.yaml deleted file mode 100644 index 0b049d31b..000000000 --- a/src/collectors/python.d.plugin/nvidia_smi/metadata.yaml +++ /dev/null @@ -1,166 +0,0 @@ -# This collector will not appear in documentation, as the go version is preferred, -# /src/go/collectors/go.d.plugin/modules/nvidia_smi/README.md -# -# meta: -# plugin_name: python.d.plugin -# module_name: nvidia_smi -# monitored_instance: -# name: python.d nvidia_smi -# link: '' -# categories: [] -# icon_filename: '' -# related_resources: -# integrations: -# list: [] -# info_provided_to_referring_integrations: -# description: '' -# keywords: [] -# most_popular: false -# overview: -# data_collection: -# metrics_description: '' -# method_description: '' -# supported_platforms: -# include: [] -# exclude: [] -# multi_instance: true -# additional_permissions: -# description: '' -# default_behavior: -# auto_detection: -# description: '' -# limits: -# description: '' -# performance_impact: -# description: '' -# setup: -# prerequisites: -# list: [] -# configuration: -# file: -# name: '' -# description: '' -# options: -# description: '' -# folding: -# title: '' -# enabled: true -# list: [] -# examples: -# folding: -# enabled: true -# title: '' -# list: [] -# troubleshooting: -# problems: -# list: [] -# alerts: [] -# metrics: -# folding: -# title: Metrics -# enabled: false -# description: "" -# availability: [] -# scopes: -# - name: GPU -# description: "" -# labels: [] -# metrics: -# - name: nvidia_smi.pci_bandwidth -# description: PCI Express Bandwidth Utilization -# unit: "KiB/s" -# chart_type: area -# dimensions: -# - name: rx -# - name: tx -# - name: nvidia_smi.pci_bandwidth_percent -# description: PCI Express Bandwidth Percent -# unit: "percentage" -# chart_type: area -# dimensions: -# - name: rx_percent -# - name: tx_percent -# - name: nvidia_smi.fan_speed -# description: Fan Speed -# unit: "percentage" -# chart_type: line -# dimensions: -# - name: speed -# - name: nvidia_smi.gpu_utilization -# description: GPU Utilization -# unit: "percentage" -# chart_type: line -# dimensions: -# - name: utilization -# - name: nvidia_smi.mem_utilization -# description: Memory Bandwidth Utilization -# unit: "percentage" -# chart_type: line -# dimensions: -# - name: utilization -# - name: nvidia_smi.encoder_utilization -# description: Encoder/Decoder Utilization -# unit: "percentage" -# chart_type: line -# dimensions: -# - name: encoder -# - name: decoder -# - name: nvidia_smi.memory_allocated -# description: Memory Usage -# unit: "MiB" -# chart_type: stacked -# dimensions: -# - name: free -# - name: used -# - name: nvidia_smi.bar1_memory_usage -# description: Bar1 Memory Usage -# unit: "MiB" -# chart_type: stacked -# dimensions: -# - name: free -# - name: used -# - name: nvidia_smi.temperature -# description: Temperature -# unit: "celsius" -# chart_type: line -# dimensions: -# - name: temp -# - name: nvidia_smi.clocks -# description: Clock Frequencies -# unit: "MHz" -# chart_type: line -# dimensions: -# - name: graphics -# - name: video -# - name: sm -# - name: mem -# - name: nvidia_smi.power -# description: Power Utilization -# unit: "Watts" -# chart_type: line -# dimensions: -# - name: power -# - name: nvidia_smi.power_state -# description: Power State -# unit: "state" -# chart_type: line -# dimensions: -# - name: a dimension per {power_state} -# - name: nvidia_smi.processes_mem -# description: Memory Used by Each Process -# unit: "MiB" -# chart_type: stacked -# dimensions: -# - name: a dimension per process -# - name: nvidia_smi.user_mem -# description: Memory Used by Each User -# unit: "MiB" -# chart_type: stacked -# dimensions: -# - name: a dimension per user -# - name: nvidia_smi.user_num -# description: Number of User on GPU -# unit: "num" -# chart_type: line -# dimensions: -# - name: users diff --git a/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py b/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py deleted file mode 100644 index 556a61435..000000000 --- a/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.chart.py +++ /dev/null @@ -1,651 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: nvidia-smi netdata python.d module -# Original Author: Steven Noonan (tycho) -# Author: Ilya Mashchenko (ilyam8) -# User Memory Stat Author: Guido Scatena (scatenag) - -import os -import pwd -import subprocess -import threading -import xml.etree.ElementTree as et - -from bases.FrameworkServices.SimpleService import SimpleService -from bases.collection import find_binary - -disabled_by_default = True - -NVIDIA_SMI = 'nvidia-smi' - -NOT_AVAILABLE = 'N/A' - -EMPTY_ROW = '' -EMPTY_ROW_LIMIT = 500 -POLLER_BREAK_ROW = '</nvidia_smi_log>' - -PCI_BANDWIDTH = 'pci_bandwidth' -PCI_BANDWIDTH_PERCENT = 'pci_bandwidth_percent' -FAN_SPEED = 'fan_speed' -GPU_UTIL = 'gpu_utilization' -MEM_UTIL = 'mem_utilization' -ENCODER_UTIL = 'encoder_utilization' -MEM_USAGE = 'mem_usage' -BAR_USAGE = 'bar1_mem_usage' -TEMPERATURE = 'temperature' -CLOCKS = 'clocks' -POWER = 'power' -POWER_STATE = 'power_state' -PROCESSES_MEM = 'processes_mem' -USER_MEM = 'user_mem' -USER_NUM = 'user_num' - -ORDER = [ - PCI_BANDWIDTH, - PCI_BANDWIDTH_PERCENT, - FAN_SPEED, - GPU_UTIL, - MEM_UTIL, - ENCODER_UTIL, - MEM_USAGE, - BAR_USAGE, - TEMPERATURE, - CLOCKS, - POWER, - POWER_STATE, - PROCESSES_MEM, - USER_MEM, - USER_NUM, -] - -# https://docs.nvidia.com/gameworks/content/gameworkslibrary/coresdk/nvapi/group__gpupstate.html -POWER_STATES = ['P' + str(i) for i in range(0, 16)] - -# PCI Transfer data rate in gigabits per second (Gb/s) per generation -PCI_SPEED = { - "1": 2.5, - "2": 5, - "3": 8, - "4": 16, - "5": 32 -} -# PCI encoding per generation -PCI_ENCODING = { - "1": 2 / 10, - "2": 2 / 10, - "3": 2 / 130, - "4": 2 / 130, - "5": 2 / 130 -} - - -def gpu_charts(gpu): - fam = gpu.full_name() - - charts = { - PCI_BANDWIDTH: { - 'options': [None, 'PCI Express Bandwidth Utilization', 'KiB/s', fam, 'nvidia_smi.pci_bandwidth', 'area'], - 'lines': [ - ['rx_util', 'rx', 'absolute', 1, 1], - ['tx_util', 'tx', 'absolute', 1, -1], - ] - }, - PCI_BANDWIDTH_PERCENT: { - 'options': [None, 'PCI Express Bandwidth Percent', 'percentage', fam, 'nvidia_smi.pci_bandwidth_percent', - 'area'], - 'lines': [ - ['rx_util_percent', 'rx_percent'], - ['tx_util_percent', 'tx_percent'], - ] - }, - FAN_SPEED: { - 'options': [None, 'Fan Speed', 'percentage', fam, 'nvidia_smi.fan_speed', 'line'], - 'lines': [ - ['fan_speed', 'speed'], - ] - }, - GPU_UTIL: { - 'options': [None, 'GPU Utilization', 'percentage', fam, 'nvidia_smi.gpu_utilization', 'line'], - 'lines': [ - ['gpu_util', 'utilization'], - ] - }, - MEM_UTIL: { - 'options': [None, 'Memory Bandwidth Utilization', 'percentage', fam, 'nvidia_smi.mem_utilization', 'line'], - 'lines': [ - ['memory_util', 'utilization'], - ] - }, - ENCODER_UTIL: { - 'options': [None, 'Encoder/Decoder Utilization', 'percentage', fam, 'nvidia_smi.encoder_utilization', - 'line'], - 'lines': [ - ['encoder_util', 'encoder'], - ['decoder_util', 'decoder'], - ] - }, - MEM_USAGE: { - 'options': [None, 'Memory Usage', 'MiB', fam, 'nvidia_smi.memory_allocated', 'stacked'], - 'lines': [ - ['fb_memory_free', 'free'], - ['fb_memory_used', 'used'], - ] - }, - BAR_USAGE: { - 'options': [None, 'Bar1 Memory Usage', 'MiB', fam, 'nvidia_smi.bar1_memory_usage', 'stacked'], - 'lines': [ - ['bar1_memory_free', 'free'], - ['bar1_memory_used', 'used'], - ] - }, - TEMPERATURE: { - 'options': [None, 'Temperature', 'celsius', fam, 'nvidia_smi.temperature', 'line'], - 'lines': [ - ['gpu_temp', 'temp'], - ] - }, - CLOCKS: { - 'options': [None, 'Clock Frequencies', 'MHz', fam, 'nvidia_smi.clocks', 'line'], - 'lines': [ - ['graphics_clock', 'graphics'], - ['video_clock', 'video'], - ['sm_clock', 'sm'], - ['mem_clock', 'mem'], - ] - }, - POWER: { - 'options': [None, 'Power Utilization', 'Watts', fam, 'nvidia_smi.power', 'line'], - 'lines': [ - ['power_draw', 'power', 'absolute', 1, 100], - ] - }, - POWER_STATE: { - 'options': [None, 'Power State', 'state', fam, 'nvidia_smi.power_state', 'line'], - 'lines': [['power_state_' + v.lower(), v, 'absolute'] for v in POWER_STATES] - }, - PROCESSES_MEM: { - 'options': [None, 'Memory Used by Each Process', 'MiB', fam, 'nvidia_smi.processes_mem', 'stacked'], - 'lines': [] - }, - USER_MEM: { - 'options': [None, 'Memory Used by Each User', 'MiB', fam, 'nvidia_smi.user_mem', 'stacked'], - 'lines': [] - }, - USER_NUM: { - 'options': [None, 'Number of User on GPU', 'num', fam, 'nvidia_smi.user_num', 'line'], - 'lines': [ - ['user_num', 'users'], - ] - }, - } - - idx = gpu.num - - order = ['gpu{0}_{1}'.format(idx, v) for v in ORDER] - charts = dict(('gpu{0}_{1}'.format(idx, k), v) for k, v in charts.items()) - - for chart in charts.values(): - for line in chart['lines']: - line[0] = 'gpu{0}_{1}'.format(idx, line[0]) - - return order, charts - - -class NvidiaSMI: - def __init__(self): - self.command = find_binary(NVIDIA_SMI) - self.active_proc = None - - def run_once(self): - proc = subprocess.Popen([self.command, '-x', '-q'], stdout=subprocess.PIPE) - stdout, _ = proc.communicate() - return stdout - - def run_loop(self, interval): - if self.active_proc: - self.kill() - proc = subprocess.Popen([self.command, '-x', '-q', '-l', str(interval)], stdout=subprocess.PIPE) - self.active_proc = proc - return proc.stdout - - def kill(self): - if self.active_proc: - self.active_proc.kill() - self.active_proc = None - - -class NvidiaSMIPoller(threading.Thread): - def __init__(self, poll_interval): - threading.Thread.__init__(self) - self.daemon = True - - self.smi = NvidiaSMI() - self.interval = poll_interval - - self.lock = threading.RLock() - self.last_data = str() - self.exit = False - self.empty_rows = 0 - self.rows = list() - - def has_smi(self): - return bool(self.smi.command) - - def run_once(self): - return self.smi.run_once() - - def run(self): - out = self.smi.run_loop(self.interval) - - for row in out: - if self.exit or self.empty_rows > EMPTY_ROW_LIMIT: - break - self.process_row(row) - self.smi.kill() - - def process_row(self, row): - row = row.decode() - self.empty_rows += (row == EMPTY_ROW) - self.rows.append(row) - - if POLLER_BREAK_ROW in row: - self.lock.acquire() - self.last_data = '\n'.join(self.rows) - self.lock.release() - - self.rows = list() - self.empty_rows = 0 - - def is_started(self): - return self.ident is not None - - def shutdown(self): - self.exit = True - - def data(self): - self.lock.acquire() - data = self.last_data - self.lock.release() - return data - - -def handle_attr_error(method): - def on_call(*args, **kwargs): - try: - return method(*args, **kwargs) - except AttributeError: - return None - - return on_call - - -def handle_value_error(method): - def on_call(*args, **kwargs): - try: - return method(*args, **kwargs) - except ValueError: - return None - - return on_call - - -HOST_PREFIX = os.getenv('NETDATA_HOST_PREFIX') -ETC_PASSWD_PATH = '/etc/passwd' -PROC_PATH = '/proc' - -IS_INSIDE_DOCKER = False - -if HOST_PREFIX: - ETC_PASSWD_PATH = os.path.join(HOST_PREFIX, ETC_PASSWD_PATH[1:]) - PROC_PATH = os.path.join(HOST_PREFIX, PROC_PATH[1:]) - IS_INSIDE_DOCKER = True - - -def read_passwd_file(): - data = dict() - with open(ETC_PASSWD_PATH, 'r') as f: - for line in f: - line = line.strip() - if line.startswith("#"): - continue - fields = line.split(":") - # name, passwd, uid, gid, comment, home_dir, shell - if len(fields) != 7: - continue - # uid, guid - fields[2], fields[3] = int(fields[2]), int(fields[3]) - data[fields[2]] = fields - return data - - -def read_passwd_file_safe(): - try: - if IS_INSIDE_DOCKER: - return read_passwd_file() - return dict((k[2], k) for k in pwd.getpwall()) - except (OSError, IOError): - return dict() - - -def get_username_by_pid_safe(pid, passwd_file): - path = os.path.join(PROC_PATH, pid) - try: - uid = os.stat(path).st_uid - except (OSError, IOError): - return '' - try: - if IS_INSIDE_DOCKER: - return passwd_file[uid][0] - return pwd.getpwuid(uid)[0] - except KeyError: - return str(uid) - - -class GPU: - def __init__(self, num, root, exclude_zero_memory_users=False): - self.num = num - self.root = root - self.exclude_zero_memory_users = exclude_zero_memory_users - - def id(self): - return self.root.get('id') - - def name(self): - return self.root.find('product_name').text - - def full_name(self): - return 'gpu{0} {1}'.format(self.num, self.name()) - - @handle_attr_error - def pci_link_gen(self): - return self.root.find('pci').find('pci_gpu_link_info').find('pcie_gen').find('max_link_gen').text - - @handle_attr_error - def pci_link_width(self): - info = self.root.find('pci').find('pci_gpu_link_info') - return info.find('link_widths').find('max_link_width').text.split('x')[0] - - def pci_bw_max(self): - link_gen = self.pci_link_gen() - link_width = int(self.pci_link_width()) - if link_gen not in PCI_SPEED or link_gen not in PCI_ENCODING or not link_width: - return None - # Maximum PCIe Bandwidth = SPEED * WIDTH * (1 - ENCODING) - 1Gb/s. - # see details https://enterprise-support.nvidia.com/s/article/understanding-pcie-configuration-for-maximum-performance - # return max bandwidth in kilobytes per second (kB/s) - return (PCI_SPEED[link_gen] * link_width * (1 - PCI_ENCODING[link_gen]) - 1) * 1000 * 1000 / 8 - - @handle_attr_error - def rx_util(self): - return self.root.find('pci').find('rx_util').text.split()[0] - - @handle_attr_error - def tx_util(self): - return self.root.find('pci').find('tx_util').text.split()[0] - - @handle_attr_error - def fan_speed(self): - return self.root.find('fan_speed').text.split()[0] - - @handle_attr_error - def gpu_util(self): - return self.root.find('utilization').find('gpu_util').text.split()[0] - - @handle_attr_error - def memory_util(self): - return self.root.find('utilization').find('memory_util').text.split()[0] - - @handle_attr_error - def encoder_util(self): - return self.root.find('utilization').find('encoder_util').text.split()[0] - - @handle_attr_error - def decoder_util(self): - return self.root.find('utilization').find('decoder_util').text.split()[0] - - @handle_attr_error - def fb_memory_used(self): - return self.root.find('fb_memory_usage').find('used').text.split()[0] - - @handle_attr_error - def fb_memory_free(self): - return self.root.find('fb_memory_usage').find('free').text.split()[0] - - @handle_attr_error - def bar1_memory_used(self): - return self.root.find('bar1_memory_usage').find('used').text.split()[0] - - @handle_attr_error - def bar1_memory_free(self): - return self.root.find('bar1_memory_usage').find('free').text.split()[0] - - @handle_attr_error - def temperature(self): - return self.root.find('temperature').find('gpu_temp').text.split()[0] - - @handle_attr_error - def graphics_clock(self): - return self.root.find('clocks').find('graphics_clock').text.split()[0] - - @handle_attr_error - def video_clock(self): - return self.root.find('clocks').find('video_clock').text.split()[0] - - @handle_attr_error - def sm_clock(self): - return self.root.find('clocks').find('sm_clock').text.split()[0] - - @handle_attr_error - def mem_clock(self): - return self.root.find('clocks').find('mem_clock').text.split()[0] - - @handle_attr_error - def power_readings(self): - elem = self.root.find('power_readings') - return elem if elem else self.root.find('gpu_power_readings') - - @handle_attr_error - def power_state(self): - return str(self.power_readings().find('power_state').text.split()[0]) - - @handle_value_error - @handle_attr_error - def power_draw(self): - return float(self.power_readings().find('power_draw').text.split()[0]) * 100 - - @handle_attr_error - def processes(self): - processes_info = self.root.find('processes').findall('process_info') - if not processes_info: - return list() - - passwd_file = read_passwd_file_safe() - processes = list() - - for info in processes_info: - pid = info.find('pid').text - processes.append({ - 'pid': int(pid), - 'process_name': info.find('process_name').text, - 'used_memory': int(info.find('used_memory').text.split()[0]), - 'username': get_username_by_pid_safe(pid, passwd_file), - }) - return processes - - def data(self): - data = { - 'rx_util': self.rx_util(), - 'tx_util': self.tx_util(), - 'fan_speed': self.fan_speed(), - 'gpu_util': self.gpu_util(), - 'memory_util': self.memory_util(), - 'encoder_util': self.encoder_util(), - 'decoder_util': self.decoder_util(), - 'fb_memory_used': self.fb_memory_used(), - 'fb_memory_free': self.fb_memory_free(), - 'bar1_memory_used': self.bar1_memory_used(), - 'bar1_memory_free': self.bar1_memory_free(), - 'gpu_temp': self.temperature(), - 'graphics_clock': self.graphics_clock(), - 'video_clock': self.video_clock(), - 'sm_clock': self.sm_clock(), - 'mem_clock': self.mem_clock(), - 'power_draw': self.power_draw(), - } - - if self.rx_util() != NOT_AVAILABLE and self.tx_util() != NOT_AVAILABLE: - pci_bw_max = self.pci_bw_max() - if not pci_bw_max: - data['rx_util_percent'] = 0 - data['tx_util_percent'] = 0 - else: - data['rx_util_percent'] = str(int(int(self.rx_util()) * 100 / self.pci_bw_max())) - data['tx_util_percent'] = str(int(int(self.tx_util()) * 100 / self.pci_bw_max())) - - for v in POWER_STATES: - data['power_state_' + v.lower()] = 0 - p_state = self.power_state() - if p_state: - data['power_state_' + p_state.lower()] = 1 - - processes = self.processes() or [] - users = set() - for p in processes: - data['process_mem_{0}'.format(p['pid'])] = p['used_memory'] - if p['username']: - if self.exclude_zero_memory_users and p['used_memory'] == 0: - continue - users.add(p['username']) - key = 'user_mem_{0}'.format(p['username']) - if key in data: - data[key] += p['used_memory'] - else: - data[key] = p['used_memory'] - data['user_num'] = len(users) - - return dict(('gpu{0}_{1}'.format(self.num, k), v) for k, v in data.items()) - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - super(Service, self).__init__(configuration=configuration, name=name) - self.order = list() - self.definitions = dict() - self.loop_mode = configuration.get('loop_mode', True) - poll = int(configuration.get('poll_seconds', self.get_update_every())) - self.exclude_zero_memory_users = configuration.get('exclude_zero_memory_users', False) - self.poller = NvidiaSMIPoller(poll) - - def get_data_loop_mode(self): - if not self.poller.is_started(): - self.poller.start() - - if not self.poller.is_alive(): - self.debug('poller is off') - return None - - return self.poller.data() - - def get_data_normal_mode(self): - return self.poller.run_once() - - def get_data(self): - if self.loop_mode: - last_data = self.get_data_loop_mode() - else: - last_data = self.get_data_normal_mode() - - if not last_data: - return None - - parsed = self.parse_xml(last_data) - if parsed is None: - return None - - data = dict() - for idx, root in enumerate(parsed.findall('gpu')): - gpu = GPU(idx, root, self.exclude_zero_memory_users) - gpu_data = gpu.data() - # self.debug(gpu_data) - gpu_data = dict((k, v) for k, v in gpu_data.items() if is_gpu_data_value_valid(v)) - data.update(gpu_data) - self.update_processes_mem_chart(gpu) - self.update_processes_user_mem_chart(gpu) - - return data or None - - def update_processes_mem_chart(self, gpu): - ps = gpu.processes() - if not ps: - return - chart = self.charts['gpu{0}_{1}'.format(gpu.num, PROCESSES_MEM)] - active_dim_ids = [] - for p in ps: - dim_id = 'gpu{0}_process_mem_{1}'.format(gpu.num, p['pid']) - active_dim_ids.append(dim_id) - if dim_id not in chart: - chart.add_dimension([dim_id, '{0} {1}'.format(p['pid'], p['process_name'])]) - for dim in chart: - if dim.id not in active_dim_ids: - chart.del_dimension(dim.id, hide=False) - - def update_processes_user_mem_chart(self, gpu): - ps = gpu.processes() - if not ps: - return - chart = self.charts['gpu{0}_{1}'.format(gpu.num, USER_MEM)] - active_dim_ids = [] - for p in ps: - if not p.get('username'): - continue - dim_id = 'gpu{0}_user_mem_{1}'.format(gpu.num, p['username']) - active_dim_ids.append(dim_id) - if dim_id not in chart: - chart.add_dimension([dim_id, '{0}'.format(p['username'])]) - - for dim in chart: - if dim.id not in active_dim_ids: - chart.del_dimension(dim.id, hide=False) - - def check(self): - if not self.poller.has_smi(): - self.error("couldn't find '{0}' binary".format(NVIDIA_SMI)) - return False - - raw_data = self.poller.run_once() - if not raw_data: - self.error("failed to invoke '{0}' binary".format(NVIDIA_SMI)) - return False - - parsed = self.parse_xml(raw_data) - if parsed is None: - return False - - gpus = parsed.findall('gpu') - if not gpus: - return False - - self.create_charts(gpus) - - return True - - def parse_xml(self, data): - try: - return et.fromstring(data) - except et.ParseError as error: - self.error('xml parse failed: "{0}", error: {1}'.format(data, error)) - - return None - - def create_charts(self, gpus): - for idx, root in enumerate(gpus): - order, charts = gpu_charts(GPU(idx, root)) - self.order.extend(order) - self.definitions.update(charts) - - -def is_gpu_data_value_valid(value): - try: - int(value) - except (TypeError, ValueError): - return False - return True diff --git a/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf b/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf deleted file mode 100644 index 3d2a30d41..000000000 --- a/src/collectors/python.d.plugin/nvidia_smi/nvidia_smi.conf +++ /dev/null @@ -1,68 +0,0 @@ -# netdata python.d.plugin configuration for nvidia_smi -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, example also supports the following: -# -# loop_mode: yes/no # default is yes. If set to yes `nvidia-smi` is executed in a separate thread using `-l` option. -# poll_seconds: SECONDS # default is 1. Sets the frequency of seconds the nvidia-smi tool is polled in loop mode. -# exclude_zero_memory_users: yes/no # default is no. Whether to collect users metrics with 0Mb memory allocation. -# -# ---------------------------------------------------------------------- diff --git a/src/collectors/python.d.plugin/openldap/integrations/openldap.md b/src/collectors/python.d.plugin/openldap/integrations/openldap.md index 97199f7dd..3f363343a 100644 --- a/src/collectors/python.d.plugin/openldap/integrations/openldap.md +++ b/src/collectors/python.d.plugin/openldap/integrations/openldap.md @@ -190,6 +190,7 @@ timeout: 1 ### Debug Mode + To troubleshoot issues with the `openldap` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -212,4 +213,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin openldap debug trace ``` +### Getting Logs + +If you're encountering problems with the `openldap` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep openldap +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep openldap /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep openldap +``` + diff --git a/src/collectors/python.d.plugin/oracledb/integrations/oracle_db.md b/src/collectors/python.d.plugin/oracledb/integrations/oracle_db.md index 5b98fbd20..4cf1b54a4 100644 --- a/src/collectors/python.d.plugin/oracledb/integrations/oracle_db.md +++ b/src/collectors/python.d.plugin/oracledb/integrations/oracle_db.md @@ -201,6 +201,7 @@ remote: ### Debug Mode + To troubleshoot issues with the `oracledb` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -223,4 +224,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin oracledb debug trace ``` +### Getting Logs + +If you're encountering problems with the `oracledb` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep oracledb +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep oracledb /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep oracledb +``` + diff --git a/src/collectors/python.d.plugin/pandas/integrations/pandas.md b/src/collectors/python.d.plugin/pandas/integrations/pandas.md index 898e23f0a..e0b5418c5 100644 --- a/src/collectors/python.d.plugin/pandas/integrations/pandas.md +++ b/src/collectors/python.d.plugin/pandas/integrations/pandas.md @@ -340,6 +340,7 @@ sql: ### Debug Mode + To troubleshoot issues with the `pandas` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -362,4 +363,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin pandas debug trace ``` +### Getting Logs + +If you're encountering problems with the `pandas` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep pandas +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep pandas /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep pandas +``` + diff --git a/src/collectors/python.d.plugin/postfix/README.md b/src/collectors/python.d.plugin/postfix/README.md deleted file mode 120000 index c62eb5c24..000000000 --- a/src/collectors/python.d.plugin/postfix/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/postfix.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/postfix/integrations/postfix.md b/src/collectors/python.d.plugin/postfix/integrations/postfix.md deleted file mode 100644 index 32cc52fbb..000000000 --- a/src/collectors/python.d.plugin/postfix/integrations/postfix.md +++ /dev/null @@ -1,151 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/postfix/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/postfix/metadata.yaml" -sidebar_label: "Postfix" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Mail Servers" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Postfix - - -<img src="https://netdata.cloud/img/postfix.svg" width="150"/> - - -Plugin: python.d.plugin -Module: postfix - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -Keep an eye on Postfix metrics for efficient mail server operations. -Improve your mail server performance with Netdata's real-time metrics and built-in alerts. - - -Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool. - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - -Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file. -See the `authorized_mailq_users` setting in the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details. - - -### Default Behavior - -#### Auto-Detection - -The collector executes `postqueue -p` to get Postfix queue statistics. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Postfix instance - -These metrics refer to the entire monitored application. - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| postfix.qemails | emails | emails | -| postfix.qsize | size | KiB | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -There is no configuration file. -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | - -</details> - -#### Examples -There are no configuration examples. - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `postfix` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin postfix debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/postfix/metadata.yaml b/src/collectors/python.d.plugin/postfix/metadata.yaml deleted file mode 100644 index 1bbb61164..000000000 --- a/src/collectors/python.d.plugin/postfix/metadata.yaml +++ /dev/null @@ -1,124 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: postfix - monitored_instance: - name: Postfix - link: https://www.postfix.org/ - categories: - - data-collection.mail-servers - icon_filename: "postfix.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - postfix - - mail - - mail server - most_popular: false - overview: - data_collection: - metrics_description: > - Keep an eye on Postfix metrics for efficient mail server operations. - - Improve your mail server performance with Netdata's real-time metrics and built-in alerts. - method_description: > - Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool. - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: > - Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view - the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to - view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file. - - See the `authorized_mailq_users` setting in - the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details. - default_behavior: - auto_detection: - description: "The collector executes `postqueue -p` to get Postfix queue statistics." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "" - description: "" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: Config options - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - examples: - folding: - enabled: true - title: "" - list: [] - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: | - These metrics refer to the entire monitored application. - labels: [] - metrics: - - name: postfix.qemails - description: Postfix Queue Emails - unit: "emails" - chart_type: line - dimensions: - - name: emails - - name: postfix.qsize - description: Postfix Queue Emails Size - unit: "KiB" - chart_type: area - dimensions: - - name: size diff --git a/src/collectors/python.d.plugin/postfix/postfix.chart.py b/src/collectors/python.d.plugin/postfix/postfix.chart.py deleted file mode 100644 index b650514ee..000000000 --- a/src/collectors/python.d.plugin/postfix/postfix.chart.py +++ /dev/null @@ -1,52 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: postfix netdata python.d module -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.ExecutableService import ExecutableService - -POSTQUEUE_COMMAND = 'postqueue -p' - -ORDER = [ - 'qemails', - 'qsize', -] - -CHARTS = { - 'qemails': { - 'options': [None, 'Postfix Queue Emails', 'emails', 'queue', 'postfix.qemails', 'line'], - 'lines': [ - ['emails', None, 'absolute'] - ] - }, - 'qsize': { - 'options': [None, 'Postfix Queue Emails Size', 'KiB', 'queue', 'postfix.qsize', 'area'], - 'lines': [ - ['size', None, 'absolute'] - ] - } -} - - -class Service(ExecutableService): - def __init__(self, configuration=None, name=None): - ExecutableService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.command = POSTQUEUE_COMMAND - - def _get_data(self): - """ - Format data received from shell command - :return: dict - """ - try: - raw = self._get_raw_data()[-1].split(' ') - if raw[0] == 'Mail' and raw[1] == 'queue': - return {'emails': 0, - 'size': 0} - - return {'emails': raw[4], - 'size': raw[1]} - except (ValueError, AttributeError): - return None diff --git a/src/collectors/python.d.plugin/postfix/postfix.conf b/src/collectors/python.d.plugin/postfix/postfix.conf deleted file mode 100644 index a4d2472ee..000000000 --- a/src/collectors/python.d.plugin/postfix/postfix.conf +++ /dev/null @@ -1,72 +0,0 @@ -# netdata python.d.plugin configuration for postfix -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# postfix is slow, so once every 10 seconds -update_every: 10 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, postfix also supports the following: -# -# command: 'postqueue -p' # the command to run -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS - -local: - command: 'postqueue -p' diff --git a/src/collectors/python.d.plugin/puppet/README.md b/src/collectors/python.d.plugin/puppet/README.md deleted file mode 120000 index b6c4c83f9..000000000 --- a/src/collectors/python.d.plugin/puppet/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/puppet.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/puppet/integrations/puppet.md b/src/collectors/python.d.plugin/puppet/integrations/puppet.md deleted file mode 100644 index 438f9bdc9..000000000 --- a/src/collectors/python.d.plugin/puppet/integrations/puppet.md +++ /dev/null @@ -1,215 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/puppet/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/puppet/metadata.yaml" -sidebar_label: "Puppet" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/CICD Platforms" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Puppet - - -<img src="https://netdata.cloud/img/puppet.svg" width="150"/> - - -Plugin: python.d.plugin -Module: puppet - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Puppet metrics about JVM Heap, Non-Heap, CPU usage and file descriptors.' - - -It uses Puppet's metrics API endpoint to gather the metrics. - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default, this collector will use `https://fqdn.example.com:8140` as the URL to look for metrics. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Puppet instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| puppet.jvm_heap | committed, used | MiB | -| puppet.jvm_nonheap | committed, used | MiB | -| puppet.cpu | execution, GC | percentage | -| puppet.fdopen | used | descriptors | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -No action required. - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/puppet.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/puppet.conf -``` -#### Options - -This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - -> Notes: -> - Exact Fully Qualified Domain Name of the node should be used. -> - Usually Puppet Server/DB startup time is VERY long. So, there should be quite reasonable retry count. -> - A secured PuppetDB config may require a client certificate. This does not apply to the default PuppetDB configuration though. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| url | HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. | https://fqdn.example.com:8081 | yes | -| tls_verify | Control HTTPS server certificate verification. | False | no | -| tls_ca_file | Optional CA (bundle) file to use | | no | -| tls_cert_file | Optional client certificate file | | no | -| tls_key_file | Optional client key file | | no | -| update_every | Sets the default data collection frequency. | 30 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | - -</details> - -#### Examples - -##### Basic - -A basic example configuration - -```yaml -puppetserver: - url: 'https://fqdn.example.com:8140' - autodetection_retry: 1 - -``` -##### TLS Certificate - -An example using a TLS certificate - -<details open><summary>Config</summary> - -```yaml -puppetdb: - url: 'https://fqdn.example.com:8081' - tls_cert_file: /path/to/client.crt - tls_key_file: /path/to/client.key - autodetection_retry: 1 - -``` -</details> - -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -puppetserver1: - url: 'https://fqdn.example.com:8140' - autodetection_retry: 1 - -puppetserver2: - url: 'https://fqdn.example2.com:8140' - autodetection_retry: 1 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `puppet` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin puppet debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/puppet/metadata.yaml b/src/collectors/python.d.plugin/puppet/metadata.yaml deleted file mode 100644 index 5f68dca7f..000000000 --- a/src/collectors/python.d.plugin/puppet/metadata.yaml +++ /dev/null @@ -1,185 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: puppet - monitored_instance: - name: Puppet - link: "https://www.puppet.com/" - categories: - - data-collection.ci-cd-systems - icon_filename: "puppet.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - puppet - - jvm heap - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors Puppet metrics about JVM Heap, Non-Heap, CPU usage and file descriptors.' - method_description: | - It uses Puppet's metrics API endpoint to gather the metrics. - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: By default, this collector will use `https://fqdn.example.com:8140` as the URL to look for metrics. - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: [] - configuration: - file: - name: "python.d/puppet.conf" - options: - description: | - This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior. - - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - > Notes: - > - Exact Fully Qualified Domain Name of the node should be used. - > - Usually Puppet Server/DB startup time is VERY long. So, there should be quite reasonable retry count. - > - A secured PuppetDB config may require a client certificate. This does not apply to the default PuppetDB configuration though. - folding: - title: "Config options" - enabled: true - list: - - name: url - description: HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. - default_value: https://fqdn.example.com:8081 - required: true - - name: tls_verify - description: Control HTTPS server certificate verification. - default_value: "False" - required: false - - name: tls_ca_file - description: Optional CA (bundle) file to use - default_value: "" - required: false - - name: tls_cert_file - description: Optional client certificate file - default_value: "" - required: false - - name: tls_key_file - description: Optional client key file - default_value: "" - required: false - - name: update_every - description: Sets the default data collection frequency. - default_value: 30 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: > - Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic example configuration - folding: - enabled: false - config: | - puppetserver: - url: 'https://fqdn.example.com:8140' - autodetection_retry: 1 - - name: TLS Certificate - description: An example using a TLS certificate - config: | - puppetdb: - url: 'https://fqdn.example.com:8081' - tls_cert_file: /path/to/client.crt - tls_key_file: /path/to/client.key - autodetection_retry: 1 - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - puppetserver1: - url: 'https://fqdn.example.com:8140' - autodetection_retry: 1 - - puppetserver2: - url: 'https://fqdn.example2.com:8140' - autodetection_retry: 1 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: puppet.jvm_heap - description: JVM Heap - unit: "MiB" - chart_type: area - dimensions: - - name: committed - - name: used - - name: puppet.jvm_nonheap - description: JVM Non-Heap - unit: "MiB" - chart_type: area - dimensions: - - name: committed - - name: used - - name: puppet.cpu - description: CPU usage - unit: "percentage" - chart_type: stacked - dimensions: - - name: execution - - name: GC - - name: puppet.fdopen - description: File Descriptors - unit: "descriptors" - chart_type: line - dimensions: - - name: used diff --git a/src/collectors/python.d.plugin/puppet/puppet.chart.py b/src/collectors/python.d.plugin/puppet/puppet.chart.py deleted file mode 100644 index 0e5b781f5..000000000 --- a/src/collectors/python.d.plugin/puppet/puppet.chart.py +++ /dev/null @@ -1,121 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: puppet netdata python.d module -# Author: Andrey Galkin <andrey@futoin.org> (andvgal) -# SPDX-License-Identifier: GPL-3.0-or-later -# -# This module should work both with OpenSource and PE versions -# of PuppetServer and PuppetDB. -# -# NOTE: PuppetDB may be configured to require proper TLS -# client certificate for security reasons. Use tls_key_file -# and tls_cert_file options then. -# - -import socket -from json import loads - -from bases.FrameworkServices.UrlService import UrlService - -update_every = 5 - -MiB = 1 << 20 -CPU_SCALE = 1000 - -ORDER = [ - 'jvm_heap', - 'jvm_nonheap', - 'cpu', - 'fd_open', -] - -CHARTS = { - 'jvm_heap': { - 'options': [None, 'JVM Heap', 'MiB', 'resources', 'puppet.jvm_heap', 'area'], - 'lines': [ - ['jvm_heap_committed', 'committed', 'absolute', 1, MiB], - ['jvm_heap_used', 'used', 'absolute', 1, MiB], - ], - 'variables': [ - ['jvm_heap_max'], - ['jvm_heap_init'], - ], - }, - 'jvm_nonheap': { - 'options': [None, 'JVM Non-Heap', 'MiB', 'resources', 'puppet.jvm_nonheap', 'area'], - 'lines': [ - ['jvm_nonheap_committed', 'committed', 'absolute', 1, MiB], - ['jvm_nonheap_used', 'used', 'absolute', 1, MiB], - ], - 'variables': [ - ['jvm_nonheap_max'], - ['jvm_nonheap_init'], - ], - }, - 'cpu': { - 'options': [None, 'CPU usage', 'percentage', 'resources', 'puppet.cpu', 'stacked'], - 'lines': [ - ['cpu_time', 'execution', 'absolute', 1, CPU_SCALE], - ['gc_time', 'GC', 'absolute', 1, CPU_SCALE], - ] - }, - 'fd_open': { - 'options': [None, 'File Descriptors', 'descriptors', 'resources', 'puppet.fdopen', 'line'], - 'lines': [ - ['fd_used', 'used', 'absolute'], - ], - 'variables': [ - ['fd_max'], - ], - }, -} - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.url = 'https://{0}:8140'.format(socket.getfqdn()) - - def _get_data(self): - # NOTE: there are several ways to retrieve data - # 1. Only PE versions: - # https://puppet.com/docs/pe/2018.1/api_status/status_api_metrics_endpoints.html - # 2. Individual Metrics API (JMX): - # https://puppet.com/docs/pe/2018.1/api_status/metrics_api.html - # 3. Extended status at debug level: - # https://puppet.com/docs/pe/2018.1/api_status/status_api_json_endpoints.html - # - # For sake of simplicity and efficiency the status one is used.. - - raw_data = self._get_raw_data(self.url + '/status/v1/services?level=debug') - - if raw_data is None: - return None - - raw_data = loads(raw_data) - data = {} - - try: - try: - jvm_metrics = raw_data['status-service']['status']['experimental']['jvm-metrics'] - except KeyError: - jvm_metrics = raw_data['status-service']['status']['jvm-metrics'] - - heap_mem = jvm_metrics['heap-memory'] - non_heap_mem = jvm_metrics['non-heap-memory'] - - for k in ['max', 'committed', 'used', 'init']: - data['jvm_heap_' + k] = heap_mem[k] - data['jvm_nonheap_' + k] = non_heap_mem[k] - - fd_open = jvm_metrics['file-descriptors'] - data['fd_max'] = fd_open['max'] - data['fd_used'] = fd_open['used'] - - data['cpu_time'] = int(jvm_metrics['cpu-usage'] * CPU_SCALE) - data['gc_time'] = int(jvm_metrics['gc-cpu-usage'] * CPU_SCALE) - except KeyError: - pass - - return data or None diff --git a/src/collectors/python.d.plugin/puppet/puppet.conf b/src/collectors/python.d.plugin/puppet/puppet.conf deleted file mode 100644 index ff5c3d020..000000000 --- a/src/collectors/python.d.plugin/puppet/puppet.conf +++ /dev/null @@ -1,94 +0,0 @@ -# netdata python.d.plugin configuration for Puppet Server and Puppet DB -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# These configuration comes from UrlService base: -# url: # HTTP or HTTPS URL -# tls_verify: False # Control HTTPS server certificate verification -# tls_ca_file: # Optional CA (bundle) file to use -# tls_cert_file: # Optional client certificate file -# tls_key_file: # Optional client key file -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) -# puppet: -# url: 'https://<FQDN>:8140' -# - -# -# Production configuration should look like below. -# -# NOTE: usually Puppet Server/DB startup time is VERY long. So, there should -# be quite reasonable retry count. -# -# NOTE: secure PuppetDB config may require client certificate. -# Not applies to default PuppetDB configuration though. -# -# puppetdb: -# url: 'https://fqdn.example.com:8081' -# tls_cert_file: /path/to/client.crt -# tls_key_file: /path/to/client.key -# autodetection_retry: 1 -# -# puppetserver: -# url: 'https://fqdn.example.com:8140' -# autodetection_retry: 1 -# diff --git a/src/collectors/python.d.plugin/python.d.conf b/src/collectors/python.d.plugin/python.d.conf index 470b4bbb7..4fcecc75d 100644 --- a/src/collectors/python.d.plugin/python.d.conf +++ b/src/collectors/python.d.plugin/python.d.conf @@ -25,43 +25,21 @@ gc_run: yes ## Garbage collection interval in seconds. Default is 300. gc_interval: 300 -# alarms: yes # am2320: yes # anomalies: no -# beanstalk: yes -# bind_rndc: yes # boinc: yes # ceph: yes -# changefinder: no -# dovecot: yes # this is just an example -example: no -# exim: yes -# gearman: yes go_expvar: no # haproxy: yes -# icecast: yes -# ipfs: yes -# memcached: yes -# monit: yes -# nvidia_smi: yes -# nsd: yes # openldap: yes # oracledb: yes # pandas: yes -# postfix: yes -# puppet: yes -# rethinkdbs: yes # retroshare: yes -# riakkv: yes # samba: yes # smartd_log: yes # spigotmc: yes -# squid: yes # traefik: yes -# tomcat: yes -# tor: yes -# uwsgi: yes # varnish: yes # w1sensor: yes # zscores: no @@ -70,17 +48,35 @@ go_expvar: no ## Disabled for existing installations. adaptec_raid: no # Removed (replaced with go.d/adaptercraid). apache: no # Removed (replaced with go.d/apache). +beanstalk: no # Removed (replaced with go.d/beanstalk). +dovecot: no # Removed (replaced with go.d/dovecot). elasticsearch: no # Removed (replaced with go.d/elasticsearch). +exim: no # Removed (replaced with go.d/exim). fail2ban: no # Removed (replaced with go.d/fail2ban). freeradius: no # Removed (replaced with go.d/freeradius). +gearman: no # Removed (replaced with go.d/gearman). hddtemp: no # Removed (replaced with go.d/hddtemp). hpssa: no # Removed (replaced with go.d/hpssa). +icecast: no # Removed (replaced with go.d/icecast) +ipfs: no # Removed (replaced with go.d/ipfs). litespeed: no # Removed (replaced with go.d/litespeed). megacli: no # Removed (replaced with go.d/megacli). +memcached: no # Removed (replaced with go.d/memcached). mongodb: no # Removed (replaced with go.d/mongodb). +monit: no # Removed (replaced with go.d/monit). mysql: no # Removed (replaced with go.d/mysql). nginx: no # Removed (replaced with go.d/nginx). +nsd: no # Removed (replaced with go.d/nsd). +nvidia_smi: no # Removed (replaced with go.d/nvidia_smi). +postfix: no # Removed (replaced with go.d/postfix). postgres: no # Removed (replaced with go.d/postgres). proxysql: no # Removed (replaced with go.d/proxysql). redis: no # Removed (replaced with go.d/redis). +rethinkdbs: no # Removed (replaced with go.d/rethinkdb). +riakkv: no # Removed (replaced with go.d/riak). sensors: no # Removed (replaced with go.d/sensors). +squid: no # Removed (replaced with go.d/squid). +tomcat: no # Removed (replaced with go.d/tomcat) +tor: no # Removed (replaced with go.d/tor). +puppet: no # Removed (replaced with go.d/puppet). +uwsgi: no # Removed (replaced with go.d/uwsgi). diff --git a/src/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py b/src/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py deleted file mode 100644 index f873eac83..000000000 --- a/src/collectors/python.d.plugin/python_modules/third_party/lm_sensors.py +++ /dev/null @@ -1,327 +0,0 @@ -# SPDX-License-Identifier: LGPL-2.1 -""" -@package sensors.py -Python Bindings for libsensors3 - -use the documentation of libsensors for the low level API. -see example.py for high level API usage. - -@author: Pavel Rojtberg (http://www.rojtberg.net) -@see: https://github.com/paroj/sensors.py -@copyright: LGPLv2 (same as libsensors) <http://opensource.org/licenses/LGPL-2.1> -""" - -from ctypes import * -import ctypes.util - -_libc = cdll.LoadLibrary(ctypes.util.find_library("c")) -# see https://github.com/paroj/sensors.py/issues/1 -_libc.free.argtypes = [c_void_p] - -_hdl = cdll.LoadLibrary(ctypes.util.find_library("sensors")) - -version = c_char_p.in_dll(_hdl, "libsensors_version").value.decode("ascii") - - -class SensorsError(Exception): - pass - - -class ErrorWildcards(SensorsError): - pass - - -class ErrorNoEntry(SensorsError): - pass - - -class ErrorAccessRead(SensorsError, OSError): - pass - - -class ErrorKernel(SensorsError, OSError): - pass - - -class ErrorDivZero(SensorsError, ZeroDivisionError): - pass - - -class ErrorChipName(SensorsError): - pass - - -class ErrorBusName(SensorsError): - pass - - -class ErrorParse(SensorsError): - pass - - -class ErrorAccessWrite(SensorsError, OSError): - pass - - -class ErrorIO(SensorsError, IOError): - pass - - -class ErrorRecursion(SensorsError): - pass - - -_ERR_MAP = { - 1: ErrorWildcards, - 2: ErrorNoEntry, - 3: ErrorAccessRead, - 4: ErrorKernel, - 5: ErrorDivZero, - 6: ErrorChipName, - 7: ErrorBusName, - 8: ErrorParse, - 9: ErrorAccessWrite, - 10: ErrorIO, - 11: ErrorRecursion -} - - -def raise_sensor_error(errno, message=''): - raise _ERR_MAP[abs(errno)](message) - - -class bus_id(Structure): - _fields_ = [("type", c_short), - ("nr", c_short)] - - -class chip_name(Structure): - _fields_ = [("prefix", c_char_p), - ("bus", bus_id), - ("addr", c_int), - ("path", c_char_p)] - - -class feature(Structure): - _fields_ = [("name", c_char_p), - ("number", c_int), - ("type", c_int)] - - # sensors_feature_type - IN = 0x00 - FAN = 0x01 - TEMP = 0x02 - POWER = 0x03 - ENERGY = 0x04 - CURR = 0x05 - HUMIDITY = 0x06 - MAX_MAIN = 0x7 - VID = 0x10 - INTRUSION = 0x11 - MAX_OTHER = 0x12 - BEEP_ENABLE = 0x18 - - -class subfeature(Structure): - _fields_ = [("name", c_char_p), - ("number", c_int), - ("type", c_int), - ("mapping", c_int), - ("flags", c_uint)] - - -_hdl.sensors_get_detected_chips.restype = POINTER(chip_name) -_hdl.sensors_get_features.restype = POINTER(feature) -_hdl.sensors_get_all_subfeatures.restype = POINTER(subfeature) -_hdl.sensors_get_label.restype = c_void_p # return pointer instead of str so we can free it -_hdl.sensors_get_adapter_name.restype = c_char_p # docs do not say whether to free this or not -_hdl.sensors_strerror.restype = c_char_p - -### RAW API ### -MODE_R = 1 -MODE_W = 2 -COMPUTE_MAPPING = 4 - - -def init(cfg_file=None): - file = _libc.fopen(cfg_file.encode("utf-8"), "r") if cfg_file is not None else None - - result = _hdl.sensors_init(file) - if result != 0: - raise_sensor_error(result, "sensors_init failed") - - if file is not None: - _libc.fclose(file) - - -def cleanup(): - _hdl.sensors_cleanup() - - -def parse_chip_name(orig_name): - ret = chip_name() - err = _hdl.sensors_parse_chip_name(orig_name.encode("utf-8"), byref(ret)) - - if err < 0: - raise_sensor_error(err, strerror(err)) - - return ret - - -def strerror(errnum): - return _hdl.sensors_strerror(errnum).decode("utf-8") - - -def free_chip_name(chip): - _hdl.sensors_free_chip_name(byref(chip)) - - -def get_detected_chips(match, nr): - """ - @return: (chip, next nr to query) - """ - _nr = c_int(nr) - - if match is not None: - match = byref(match) - - chip = _hdl.sensors_get_detected_chips(match, byref(_nr)) - chip = chip.contents if bool(chip) else None - return chip, _nr.value - - -def chip_snprintf_name(chip, buffer_size=200): - """ - @param buffer_size defaults to the size used in the sensors utility - """ - ret = create_string_buffer(buffer_size) - err = _hdl.sensors_snprintf_chip_name(ret, buffer_size, byref(chip)) - - if err < 0: - raise_sensor_error(err, strerror(err)) - - return ret.value.decode("utf-8") - - -def do_chip_sets(chip): - """ - @attention this function was not tested - """ - err = _hdl.sensors_do_chip_sets(byref(chip)) - if err < 0: - raise_sensor_error(err, strerror(err)) - - -def get_adapter_name(bus): - return _hdl.sensors_get_adapter_name(byref(bus)).decode("utf-8") - - -def get_features(chip, nr): - """ - @return: (feature, next nr to query) - """ - _nr = c_int(nr) - feature = _hdl.sensors_get_features(byref(chip), byref(_nr)) - feature = feature.contents if bool(feature) else None - return feature, _nr.value - - -def get_label(chip, feature): - ptr = _hdl.sensors_get_label(byref(chip), byref(feature)) - val = cast(ptr, c_char_p).value.decode("utf-8") - _libc.free(ptr) - return val - - -def get_all_subfeatures(chip, feature, nr): - """ - @return: (subfeature, next nr to query) - """ - _nr = c_int(nr) - subfeature = _hdl.sensors_get_all_subfeatures(byref(chip), byref(feature), byref(_nr)) - subfeature = subfeature.contents if bool(subfeature) else None - return subfeature, _nr.value - - -def get_value(chip, subfeature_nr): - val = c_double() - err = _hdl.sensors_get_value(byref(chip), subfeature_nr, byref(val)) - if err < 0: - raise_sensor_error(err, strerror(err)) - return val.value - - -def set_value(chip, subfeature_nr, value): - """ - @attention this function was not tested - """ - val = c_double(value) - err = _hdl.sensors_set_value(byref(chip), subfeature_nr, byref(val)) - if err < 0: - raise_sensor_error(err, strerror(err)) - - -### Convenience API ### -class ChipIterator: - def __init__(self, match=None): - self.match = parse_chip_name(match) if match is not None else None - self.nr = 0 - - def __iter__(self): - return self - - def __next__(self): - chip, self.nr = get_detected_chips(self.match, self.nr) - - if chip is None: - raise StopIteration - - return chip - - def __del__(self): - if self.match is not None: - free_chip_name(self.match) - - def next(self): # python2 compability - return self.__next__() - - -class FeatureIterator: - def __init__(self, chip): - self.chip = chip - self.nr = 0 - - def __iter__(self): - return self - - def __next__(self): - feature, self.nr = get_features(self.chip, self.nr) - - if feature is None: - raise StopIteration - - return feature - - def next(self): # python2 compability - return self.__next__() - - -class SubFeatureIterator: - def __init__(self, chip, feature): - self.chip = chip - self.feature = feature - self.nr = 0 - - def __iter__(self): - return self - - def __next__(self): - subfeature, self.nr = get_all_subfeatures(self.chip, self.feature, self.nr) - - if subfeature is None: - raise StopIteration - - return subfeature - - def next(self): # python2 compability - return self.__next__() diff --git a/src/collectors/python.d.plugin/rethinkdbs/README.md b/src/collectors/python.d.plugin/rethinkdbs/README.md deleted file mode 120000 index 78ddcfa18..000000000 --- a/src/collectors/python.d.plugin/rethinkdbs/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/rethinkdb.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md b/src/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md deleted file mode 100644 index f7da12dd6..000000000 --- a/src/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md +++ /dev/null @@ -1,190 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/rethinkdbs/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/rethinkdbs/metadata.yaml" -sidebar_label: "RethinkDB" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Databases" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# RethinkDB - - -<img src="https://netdata.cloud/img/rethinkdb.png" width="150"/> - - -Plugin: python.d.plugin -Module: rethinkdbs - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors metrics about RethinkDB clusters and database servers. - -It uses the `rethinkdb` python module to connect to a RethinkDB server instance and gather statistics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -When no configuration file is found, the collector tries to connect to 127.0.0.1:28015. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per RethinkDB instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| rethinkdb.cluster_connected_servers | connected, missing | servers | -| rethinkdb.cluster_clients_active | active | clients | -| rethinkdb.cluster_queries | queries | queries/s | -| rethinkdb.cluster_documents | reads, writes | documents/s | - -### Per database server - - - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| rethinkdb.client_connections | connections | connections | -| rethinkdb.clients_active | active | clients | -| rethinkdb.queries | queries | queries/s | -| rethinkdb.documents | reads, writes | documents/s | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Required python module - -The collector requires the `rethinkdb` python module to be installed. - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/rethinkdbs.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/rethinkdbs.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| host | Hostname or ip of the RethinkDB server. | localhost | no | -| port | Port to connect to the RethinkDB server. | 28015 | no | -| user | The username to use to connect to the RethinkDB server. | admin | no | -| password | The password to use to connect to the RethinkDB server. | | no | -| timeout | Set a connect timeout to the RethinkDB server. | 2 | no | - -</details> - -#### Examples - -##### Local RethinkDB server - -An example of a configuration for a local RethinkDB server - -```yaml -localhost: - name: 'local' - host: '127.0.0.1' - port: 28015 - user: "user" - password: "pass" - -``` - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `rethinkdbs` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin rethinkdbs debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/rethinkdbs/metadata.yaml b/src/collectors/python.d.plugin/rethinkdbs/metadata.yaml deleted file mode 100644 index bbc50eac6..000000000 --- a/src/collectors/python.d.plugin/rethinkdbs/metadata.yaml +++ /dev/null @@ -1,188 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: rethinkdbs - monitored_instance: - name: RethinkDB - link: 'https://rethinkdb.com/' - categories: - - data-collection.database-servers - icon_filename: 'rethinkdb.png' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: - - rethinkdb - - database - - db - most_popular: false - overview: - data_collection: - metrics_description: 'This collector monitors metrics about RethinkDB clusters and database servers.' - method_description: 'It uses the `rethinkdb` python module to connect to a RethinkDB server instance and gather statistics.' - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: 'When no configuration file is found, the collector tries to connect to 127.0.0.1:28015.' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: - - title: 'Required python module' - description: 'The collector requires the `rethinkdb` python module to be installed.' - configuration: - file: - name: python.d/rethinkdbs.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: '' - required: false - - name: host - description: Hostname or ip of the RethinkDB server. - default_value: 'localhost' - required: false - - name: port - description: Port to connect to the RethinkDB server. - default_value: '28015' - required: false - - name: user - description: The username to use to connect to the RethinkDB server. - default_value: 'admin' - required: false - - name: password - description: The password to use to connect to the RethinkDB server. - default_value: '' - required: false - - name: timeout - description: Set a connect timeout to the RethinkDB server. - default_value: '2' - required: false - examples: - folding: - enabled: false - title: "Config" - list: - - name: Local RethinkDB server - description: An example of a configuration for a local RethinkDB server - folding: - enabled: false - config: | - localhost: - name: 'local' - host: '127.0.0.1' - port: 28015 - user: "user" - password: "pass" - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: rethinkdb.cluster_connected_servers - description: Connected Servers - unit: "servers" - chart_type: stacked - dimensions: - - name: connected - - name: missing - - name: rethinkdb.cluster_clients_active - description: Active Clients - unit: "clients" - chart_type: line - dimensions: - - name: active - - name: rethinkdb.cluster_queries - description: Queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: rethinkdb.cluster_documents - description: Documents - unit: "documents/s" - chart_type: line - dimensions: - - name: reads - - name: writes - - name: database server - description: "" - labels: [] - metrics: - - name: rethinkdb.client_connections - description: Client Connections - unit: "connections" - chart_type: line - dimensions: - - name: connections - - name: rethinkdb.clients_active - description: Active Clients - unit: "clients" - chart_type: line - dimensions: - - name: active - - name: rethinkdb.queries - description: Queries - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: rethinkdb.documents - description: Documents - unit: "documents/s" - chart_type: line - dimensions: - - name: reads - - name: writes diff --git a/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py b/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py deleted file mode 100644 index e3fbc3632..000000000 --- a/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.chart.py +++ /dev/null @@ -1,247 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: rethinkdb netdata python.d module -# Author: Ilya Mashchenko (ilyam8) -# SPDX-License-Identifier: GPL-3.0-or-later - -try: - import rethinkdb as rdb - - HAS_RETHINKDB = True -except ImportError: - HAS_RETHINKDB = False - -from bases.FrameworkServices.SimpleService import SimpleService - -ORDER = [ - 'cluster_connected_servers', - 'cluster_clients_active', - 'cluster_queries', - 'cluster_documents', -] - - -def cluster_charts(): - return { - 'cluster_connected_servers': { - 'options': [None, 'Connected Servers', 'servers', 'cluster', 'rethinkdb.cluster_connected_servers', - 'stacked'], - 'lines': [ - ['cluster_servers_connected', 'connected'], - ['cluster_servers_missing', 'missing'], - ] - }, - 'cluster_clients_active': { - 'options': [None, 'Active Clients', 'clients', 'cluster', 'rethinkdb.cluster_clients_active', - 'line'], - 'lines': [ - ['cluster_clients_active', 'active'], - ] - }, - 'cluster_queries': { - 'options': [None, 'Queries', 'queries/s', 'cluster', 'rethinkdb.cluster_queries', 'line'], - 'lines': [ - ['cluster_queries_per_sec', 'queries'], - ] - }, - 'cluster_documents': { - 'options': [None, 'Documents', 'documents/s', 'cluster', 'rethinkdb.cluster_documents', 'line'], - 'lines': [ - ['cluster_read_docs_per_sec', 'reads'], - ['cluster_written_docs_per_sec', 'writes'], - ] - }, - } - - -def server_charts(n): - o = [ - '{0}_client_connections'.format(n), - '{0}_clients_active'.format(n), - '{0}_queries'.format(n), - '{0}_documents'.format(n), - ] - f = 'server {0}'.format(n) - - c = { - o[0]: { - 'options': [None, 'Client Connections', 'connections', f, 'rethinkdb.client_connections', 'line'], - 'lines': [ - ['{0}_client_connections'.format(n), 'connections'], - ] - }, - o[1]: { - 'options': [None, 'Active Clients', 'clients', f, 'rethinkdb.clients_active', 'line'], - 'lines': [ - ['{0}_clients_active'.format(n), 'active'], - ] - }, - o[2]: { - 'options': [None, 'Queries', 'queries/s', f, 'rethinkdb.queries', 'line'], - 'lines': [ - ['{0}_queries_total'.format(n), 'queries', 'incremental'], - ] - }, - o[3]: { - 'options': [None, 'Documents', 'documents/s', f, 'rethinkdb.documents', 'line'], - 'lines': [ - ['{0}_read_docs_total'.format(n), 'reads', 'incremental'], - ['{0}_written_docs_total'.format(n), 'writes', 'incremental'], - ] - }, - } - - return o, c - - -class Cluster: - def __init__(self, raw): - self.raw = raw - - def data(self): - qe = self.raw['query_engine'] - - return { - 'cluster_clients_active': qe['clients_active'], - 'cluster_queries_per_sec': qe['queries_per_sec'], - 'cluster_read_docs_per_sec': qe['read_docs_per_sec'], - 'cluster_written_docs_per_sec': qe['written_docs_per_sec'], - 'cluster_servers_connected': 0, - 'cluster_servers_missing': 0, - } - - -class Server: - def __init__(self, raw): - self.name = raw['server'] - self.raw = raw - - def error(self): - return self.raw.get('error') - - def data(self): - qe = self.raw['query_engine'] - - d = { - 'client_connections': qe['client_connections'], - 'clients_active': qe['clients_active'], - 'queries_total': qe['queries_total'], - 'read_docs_total': qe['read_docs_total'], - 'written_docs_total': qe['written_docs_total'], - } - - return dict(('{0}_{1}'.format(self.name, k), d[k]) for k in d) - - -# https://pypi.org/project/rethinkdb/2.4.0/ -# rdb.RethinkDB() can be used as rdb drop in replacement. -# https://github.com/rethinkdb/rethinkdb-python#quickstart -def get_rethinkdb(): - if hasattr(rdb, 'RethinkDB'): - return rdb.RethinkDB() - return rdb - - -class Service(SimpleService): - def __init__(self, configuration=None, name=None): - SimpleService.__init__(self, configuration=configuration, name=name) - self.order = list(ORDER) - self.definitions = cluster_charts() - self.host = self.configuration.get('host', '127.0.0.1') - self.port = self.configuration.get('port', 28015) - self.user = self.configuration.get('user', 'admin') - self.password = self.configuration.get('password') - self.timeout = self.configuration.get('timeout', 2) - self.rdb = None - self.conn = None - self.alive = True - - def check(self): - if not HAS_RETHINKDB: - self.error('"rethinkdb" module is needed to use rethinkdbs.py') - return False - - self.debug("rethinkdb driver version {0}".format(rdb.__version__)) - self.rdb = get_rethinkdb() - - if not self.connect(): - return None - - stats = self.get_stats() - - if not stats: - return None - - for v in stats[1:]: - if get_id(v) == 'server': - o, c = server_charts(v['server']) - self.order.extend(o) - self.definitions.update(c) - - return True - - def get_data(self): - if not self.is_alive(): - return None - - stats = self.get_stats() - - if not stats: - return None - - data = dict() - - # cluster - data.update(Cluster(stats[0]).data()) - - # servers - for v in stats[1:]: - if get_id(v) != 'server': - continue - - s = Server(v) - - if s.error(): - data['cluster_servers_missing'] += 1 - else: - data['cluster_servers_connected'] += 1 - data.update(s.data()) - - return data - - def get_stats(self): - try: - return list(self.rdb.db('rethinkdb').table('stats').run(self.conn).items) - except rdb.errors.ReqlError: - self.alive = False - return None - - def connect(self): - try: - self.conn = self.rdb.connect( - host=self.host, - port=self.port, - user=self.user, - password=self.password, - timeout=self.timeout, - ) - self.alive = True - return True - except rdb.errors.ReqlError as error: - self.error('Connection to {0}:{1} failed: {2}'.format(self.host, self.port, error)) - return False - - def reconnect(self): - # The connection is already closed after rdb.errors.ReqlError, - # so we do not need to call conn.close() - if self.connect(): - return True - return False - - def is_alive(self): - if not self.alive: - return self.reconnect() - return True - - -def get_id(v): - return v['id'][0] diff --git a/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf b/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf deleted file mode 100644 index d671acbb0..000000000 --- a/src/collectors/python.d.plugin/rethinkdbs/rethinkdbs.conf +++ /dev/null @@ -1,76 +0,0 @@ -# netdata python.d.plugin configuration for rethinkdb -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, rethinkdb also supports the following: -# -# host: IP or HOSTNAME # default is 'localhost' -# port: PORT # default is 28015 -# user: USERNAME # default is 'admin' -# password: PASSWORD # not set by default -# timeout: TIMEOUT # default is 2 - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -local: - name: 'local' - host: 'localhost' diff --git a/src/collectors/python.d.plugin/retroshare/README.md b/src/collectors/python.d.plugin/retroshare/README.md deleted file mode 120000 index 4e4c2cdb7..000000000 --- a/src/collectors/python.d.plugin/retroshare/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/retroshare.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/retroshare/integrations/retroshare.md b/src/collectors/python.d.plugin/retroshare/integrations/retroshare.md deleted file mode 100644 index b045127ee..000000000 --- a/src/collectors/python.d.plugin/retroshare/integrations/retroshare.md +++ /dev/null @@ -1,191 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/retroshare/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/retroshare/metadata.yaml" -sidebar_label: "RetroShare" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Media Services" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# RetroShare - - -<img src="https://netdata.cloud/img/retroshare.png" width="150"/> - - -Plugin: python.d.plugin -Module: retroshare - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors RetroShare statistics such as application bandwidth, peers, and DHT metrics. - -It connects to the RetroShare web interface to gather metrics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -The collector will attempt to connect and detect a RetroShare web interface through http://localhost:9090, even without any configuration. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per RetroShare instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| retroshare.bandwidth | Upload, Download | kilobits/s | -| retroshare.peers | All friends, Connected friends | peers | -| retroshare.dht | DHT nodes estimated, RS nodes estimated | peers | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ retroshare_dht_working ](https://github.com/netdata/netdata/blob/master/src/health/health.d/retroshare.conf) | retroshare.dht | number of DHT peers | - - -## Setup - -### Prerequisites - -#### RetroShare web interface - -RetroShare needs to be configured to enable the RetroShare WEB Interface and allow access from the Netdata host. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/retroshare.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/retroshare.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| url | The URL to the RetroShare Web UI. | http://localhost:9090 | no | - -</details> - -#### Examples - -##### Local RetroShare Web UI - -A basic configuration for a RetroShare server running on localhost. - -<details open><summary>Config</summary> - -```yaml -localhost: - name: 'local retroshare' - url: 'http://localhost:9090' - -``` -</details> - -##### Remote RetroShare Web UI - -A basic configuration for a remote RetroShare server. - -<details open><summary>Config</summary> - -```yaml -remote: - name: 'remote retroshare' - url: 'http://1.2.3.4:9090' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `retroshare` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin retroshare debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/retroshare/metadata.yaml b/src/collectors/python.d.plugin/retroshare/metadata.yaml deleted file mode 100644 index e0270e1dd..000000000 --- a/src/collectors/python.d.plugin/retroshare/metadata.yaml +++ /dev/null @@ -1,144 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: retroshare - monitored_instance: - name: RetroShare - link: "https://retroshare.cc/" - categories: - - data-collection.media-streaming-servers - icon_filename: "retroshare.png" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - retroshare - - p2p - most_popular: false - overview: - data_collection: - metrics_description: "This collector monitors RetroShare statistics such as application bandwidth, peers, and DHT metrics." - method_description: "It connects to the RetroShare web interface to gather metrics." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "The collector will attempt to connect and detect a RetroShare web interface through http://localhost:9090, even without any configuration." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: "RetroShare web interface" - description: | - RetroShare needs to be configured to enable the RetroShare WEB Interface and allow access from the Netdata host. - configuration: - file: - name: python.d/retroshare.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "" - required: false - - name: url - description: The URL to the RetroShare Web UI. - default_value: "http://localhost:9090" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Local RetroShare Web UI - description: A basic configuration for a RetroShare server running on localhost. - config: | - localhost: - name: 'local retroshare' - url: 'http://localhost:9090' - - name: Remote RetroShare Web UI - description: A basic configuration for a remote RetroShare server. - config: | - remote: - name: 'remote retroshare' - url: 'http://1.2.3.4:9090' - - troubleshooting: - problems: - list: [] - alerts: - - name: retroshare_dht_working - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/retroshare.conf - metric: retroshare.dht - info: number of DHT peers - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: retroshare.bandwidth - description: RetroShare Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: Upload - - name: Download - - name: retroshare.peers - description: RetroShare Peers - unit: "peers" - chart_type: line - dimensions: - - name: All friends - - name: Connected friends - - name: retroshare.dht - description: Retroshare DHT - unit: "peers" - chart_type: line - dimensions: - - name: DHT nodes estimated - - name: RS nodes estimated diff --git a/src/collectors/python.d.plugin/retroshare/retroshare.chart.py b/src/collectors/python.d.plugin/retroshare/retroshare.chart.py deleted file mode 100644 index 3f9593e94..000000000 --- a/src/collectors/python.d.plugin/retroshare/retroshare.chart.py +++ /dev/null @@ -1,78 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: RetroShare netdata python.d module -# Authors: sehraf -# SPDX-License-Identifier: GPL-3.0-or-later - -import json - -from bases.FrameworkServices.UrlService import UrlService - -ORDER = [ - 'bandwidth', - 'peers', - 'dht', -] - -CHARTS = { - 'bandwidth': { - 'options': [None, 'RetroShare Bandwidth', 'kilobits/s', 'RetroShare', 'retroshare.bandwidth', 'area'], - 'lines': [ - ['bandwidth_up_kb', 'Upload'], - ['bandwidth_down_kb', 'Download'] - ] - }, - 'peers': { - 'options': [None, 'RetroShare Peers', 'peers', 'RetroShare', 'retroshare.peers', 'line'], - 'lines': [ - ['peers_all', 'All friends'], - ['peers_connected', 'Connected friends'] - ] - }, - 'dht': { - 'options': [None, 'Retroshare DHT', 'peers', 'RetroShare', 'retroshare.dht', 'line'], - 'lines': [ - ['dht_size_all', 'DHT nodes estimated'], - ['dht_size_rs', 'RS nodes estimated'] - ] - } -} - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.baseurl = self.configuration.get('url', 'http://localhost:9090') - - def _get_stats(self): - """ - Format data received from http request - :return: dict - """ - try: - raw = self._get_raw_data() - parsed = json.loads(raw) - if str(parsed['returncode']) != 'ok': - return None - except (TypeError, ValueError): - return None - - return parsed['data'][0] - - def _get_data(self): - """ - Get data from API - :return: dict - """ - self.url = self.baseurl + '/api/v2/stats' - data = self._get_stats() - if data is None: - return None - - data['bandwidth_up_kb'] = data['bandwidth_up_kb'] * -1 - if data['dht_active'] is False: - data['dht_size_all'] = None - data['dht_size_rs'] = None - - return data diff --git a/src/collectors/python.d.plugin/retroshare/retroshare.conf b/src/collectors/python.d.plugin/retroshare/retroshare.conf deleted file mode 100644 index 3d0af538d..000000000 --- a/src/collectors/python.d.plugin/retroshare/retroshare.conf +++ /dev/null @@ -1,72 +0,0 @@ -# netdata python.d.plugin configuration for RetroShare -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, RetroShare also supports the following: -# -# - url: 'url' # the URL to the WebUI -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name: 'local' - url: 'http://localhost:9090' diff --git a/src/collectors/python.d.plugin/riakkv/README.md b/src/collectors/python.d.plugin/riakkv/README.md deleted file mode 120000 index f43ece09b..000000000 --- a/src/collectors/python.d.plugin/riakkv/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/riakkv.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/riakkv/integrations/riakkv.md b/src/collectors/python.d.plugin/riakkv/integrations/riakkv.md deleted file mode 100644 index a671b9c76..000000000 --- a/src/collectors/python.d.plugin/riakkv/integrations/riakkv.md +++ /dev/null @@ -1,220 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/riakkv/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/riakkv/metadata.yaml" -sidebar_label: "RiakKV" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Databases" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# RiakKV - - -<img src="https://netdata.cloud/img/riak.svg" width="150"/> - - -Plugin: python.d.plugin -Module: riakkv - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors RiakKV metrics about throughput, latency, resources and more.' - - -This collector reads the database stats from the `/stats` endpoint. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If the /stats endpoint is accessible, RiakKV instances on the local host running on port 8098 will be autodetected. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per RiakKV instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| riak.kv.throughput | gets, puts | operations/s | -| riak.dt.vnode_updates | counters, sets, maps | operations/s | -| riak.search | queries | queries/s | -| riak.search.documents | indexed | documents/s | -| riak.consistent.operations | gets, puts | operations/s | -| riak.kv.latency.get | mean, median, 95, 99, 100 | ms | -| riak.kv.latency.put | mean, median, 95, 99, 100 | ms | -| riak.dt.latency.counter_merge | mean, median, 95, 99, 100 | ms | -| riak.dt.latency.set_merge | mean, median, 95, 99, 100 | ms | -| riak.dt.latency.map_merge | mean, median, 95, 99, 100 | ms | -| riak.search.latency.query | median, min, 95, 99, 999, max | ms | -| riak.search.latency.index | median, min, 95, 99, 999, max | ms | -| riak.consistent.latency.get | mean, median, 95, 99, 100 | ms | -| riak.consistent.latency.put | mean, median, 95, 99, 100 | ms | -| riak.vm | processes | total | -| riak.vm.memory.processes | allocated, used | MB | -| riak.kv.siblings_encountered.get | mean, median, 95, 99, 100 | siblings | -| riak.kv.objsize.get | mean, median, 95, 99, 100 | KB | -| riak.search.vnodeq_size | mean, median, 95, 99, 100 | messages | -| riak.search.index | errors | errors | -| riak.core.protobuf_connections | active | connections | -| riak.core.repairs | read | repairs | -| riak.core.fsm_active | get, put, secondary index, list keys | fsms | -| riak.core.fsm_rejected | get, put | fsms | -| riak.search.index | bad_entry, extract_fail | writes | - - - -## Alerts - - -The following alerts are available: - -| Alert name | On metric | Description | -|:------------|:----------|:------------| -| [ riakkv_1h_kv_get_mean_latency ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.kv.latency.get | average time between reception of client GET request and subsequent response to client over the last hour | -| [ riakkv_kv_get_slow ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.kv.latency.get | average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour | -| [ riakkv_1h_kv_put_mean_latency ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.kv.latency.put | average time between reception of client PUT request and subsequent response to the client over the last hour | -| [ riakkv_kv_put_slow ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.kv.latency.put | average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour | -| [ riakkv_vm_high_process_count ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.vm | number of processes running in the Erlang VM | -| [ riakkv_list_keys_active ](https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf) | riak.core.fsm_active | number of currently running list keys finite state machines | - - -## Setup - -### Prerequisites - -#### Configure RiakKV to enable /stats endpoint - -You can follow the RiakKV configuration reference documentation for how to enable this. - -Source : https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/riakkv.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/riakkv.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| url | The url of the server | no | yes | - -</details> - -#### Examples - -##### Basic (default) - -A basic example configuration per job - -```yaml -local: -url: 'http://localhost:8098/stats' - -``` -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -local: - url: 'http://localhost:8098/stats' - -remote: - url: 'http://192.0.2.1:8098/stats' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `riakkv` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin riakkv debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/riakkv/metadata.yaml b/src/collectors/python.d.plugin/riakkv/metadata.yaml deleted file mode 100644 index d68e73053..000000000 --- a/src/collectors/python.d.plugin/riakkv/metadata.yaml +++ /dev/null @@ -1,358 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: riakkv - monitored_instance: - name: RiakKV - link: "https://riak.com/products/riak-kv/index.html" - categories: - - data-collection.database-servers - icon_filename: "riak.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - database - - nosql - - big data - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors RiakKV metrics about throughput, latency, resources and more.' - method_description: "This collector reads the database stats from the `/stats` endpoint." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "If the /stats endpoint is accessible, RiakKV instances on the local host running on port 8098 will be autodetected." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Configure RiakKV to enable /stats endpoint - description: | - You can follow the RiakKV configuration reference documentation for how to enable this. - - Source : https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces - configuration: - file: - name: "python.d/riakkv.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: url - description: The url of the server - default_value: no - required: true - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic (default) - folding: - enabled: false - description: A basic example configuration per job - config: | - local: - url: 'http://localhost:8098/stats' - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - local: - url: 'http://localhost:8098/stats' - - remote: - url: 'http://192.0.2.1:8098/stats' - troubleshooting: - problems: - list: [] - alerts: - - name: riakkv_1h_kv_get_mean_latency - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.kv.latency.get - info: average time between reception of client GET request and subsequent response to client over the last hour - - name: riakkv_kv_get_slow - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.kv.latency.get - info: average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour - - name: riakkv_1h_kv_put_mean_latency - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.kv.latency.put - info: average time between reception of client PUT request and subsequent response to the client over the last hour - - name: riakkv_kv_put_slow - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.kv.latency.put - info: average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour - - name: riakkv_vm_high_process_count - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.vm - info: number of processes running in the Erlang VM - - name: riakkv_list_keys_active - link: https://github.com/netdata/netdata/blob/master/src/health/health.d/riakkv.conf - metric: riak.core.fsm_active - info: number of currently running list keys finite state machines - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: riak.kv.throughput - description: Reads & writes coordinated by this node - unit: "operations/s" - chart_type: line - dimensions: - - name: gets - - name: puts - - name: riak.dt.vnode_updates - description: Update operations coordinated by local vnodes by data type - unit: "operations/s" - chart_type: line - dimensions: - - name: counters - - name: sets - - name: maps - - name: riak.search - description: Search queries on the node - unit: "queries/s" - chart_type: line - dimensions: - - name: queries - - name: riak.search.documents - description: Documents indexed by search - unit: "documents/s" - chart_type: line - dimensions: - - name: indexed - - name: riak.consistent.operations - description: Consistent node operations - unit: "operations/s" - chart_type: line - dimensions: - - name: gets - - name: puts - - name: riak.kv.latency.get - description: Time between reception of a client GET request and subsequent response to client - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.kv.latency.put - description: Time between reception of a client PUT request and subsequent response to client - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.dt.latency.counter_merge - description: Time it takes to perform an Update Counter operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.dt.latency.set_merge - description: Time it takes to perform an Update Set operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.dt.latency.map_merge - description: Time it takes to perform an Update Map operation - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.search.latency.query - description: Search query latency - unit: "ms" - chart_type: line - dimensions: - - name: median - - name: min - - name: "95" - - name: "99" - - name: "999" - - name: max - - name: riak.search.latency.index - description: Time it takes Search to index a new document - unit: "ms" - chart_type: line - dimensions: - - name: median - - name: min - - name: "95" - - name: "99" - - name: "999" - - name: max - - name: riak.consistent.latency.get - description: Strongly consistent read latency - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.consistent.latency.put - description: Strongly consistent write latency - unit: "ms" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.vm - description: Total processes running in the Erlang VM - unit: "total" - chart_type: line - dimensions: - - name: processes - - name: riak.vm.memory.processes - description: Memory allocated & used by Erlang processes - unit: "MB" - chart_type: line - dimensions: - - name: allocated - - name: used - - name: riak.kv.siblings_encountered.get - description: Number of siblings encountered during GET operations by this node during the past minute - unit: "siblings" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.kv.objsize.get - description: Object size encountered by this node during the past minute - unit: "KB" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.search.vnodeq_size - description: Number of unprocessed messages in the vnode message queues of Search on this node in the past minute - unit: "messages" - chart_type: line - dimensions: - - name: mean - - name: median - - name: "95" - - name: "99" - - name: "100" - - name: riak.search.index - description: Number of document index errors encountered by Search - unit: "errors" - chart_type: line - dimensions: - - name: errors - - name: riak.core.protobuf_connections - description: Protocol buffer connections by status - unit: "connections" - chart_type: line - dimensions: - - name: active - - name: riak.core.repairs - description: Number of repair operations this node has coordinated - unit: "repairs" - chart_type: line - dimensions: - - name: read - - name: riak.core.fsm_active - description: Active finite state machines by kind - unit: "fsms" - chart_type: line - dimensions: - - name: get - - name: put - - name: secondary index - - name: list keys - - name: riak.core.fsm_rejected - description: Finite state machines being rejected by Sidejobs overload protection - unit: "fsms" - chart_type: line - dimensions: - - name: get - - name: put - - name: riak.search.index - description: Number of writes to Search failed due to bad data format by reason - unit: "writes" - chart_type: line - dimensions: - - name: bad_entry - - name: extract_fail diff --git a/src/collectors/python.d.plugin/riakkv/riakkv.chart.py b/src/collectors/python.d.plugin/riakkv/riakkv.chart.py deleted file mode 100644 index c390c8bc0..000000000 --- a/src/collectors/python.d.plugin/riakkv/riakkv.chart.py +++ /dev/null @@ -1,334 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: riak netdata python.d module -# -# See also: -# https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html - -from json import loads - -from bases.FrameworkServices.UrlService import UrlService - -# Riak updates the metrics at the /stats endpoint every 1 second. -# If we use `update_every = 1` here, that means we might get weird jitter in the graph, -# so the default is set to 2 seconds to prevent it. -update_every = 2 - -# charts order (can be overridden if you want less charts, or different order) -ORDER = [ - # Throughput metrics - # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics - # Collected in totals. - "kv.node_operations", # K/V node operations. - "dt.vnode_updates", # Data type vnode updates. - "search.queries", # Search queries on the node. - "search.documents", # Documents indexed by Search. - "consistent.operations", # Consistent node operations. - - # Latency metrics - # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#throughput-metrics - # Collected for the past minute in milliseconds, - # returned from riak in microseconds. - "kv.latency.get", # K/V GET FSM traversal latency. - "kv.latency.put", # K/V PUT FSM traversal latency. - "dt.latency.counter", # Update Counter Data type latency. - "dt.latency.set", # Update Set Data type latency. - "dt.latency.map", # Update Map Data type latency. - "search.latency.query", # Search query latency. - "search.latency.index", # Time it takes for search to index a new document. - "consistent.latency.get", # Strong consistent read latency. - "consistent.latency.put", # Strong consistent write latency. - - # Erlang resource usage metrics - # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#erlang-resource-usage-metrics - # Processes collected as a gauge, - # memory collected as Megabytes, returned as bytes from Riak. - "vm.processes", # Number of processes currently running in the Erlang VM. - "vm.memory.processes", # Total amount of memory allocated & used for Erlang processes. - - # General Riak Load / Health metrics - # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-load-health-metrics - # The following are collected by Riak over the past minute: - "kv.siblings_encountered.get", # Siblings encountered during GET operations by this node. - "kv.objsize.get", # Object size encountered by this node. - "search.vnodeq_size", # Number of unprocessed messages in the vnode message queues (Search). - # The following are calculated in total, or as gauges: - "search.index_errors", # Errors of the search subsystem while indexing documents. - "core.pbc", # Number of currently active protocol buffer connections. - "core.repairs", # Total read repair operations coordinated by this node. - "core.fsm_active", # Active finite state machines by kind. - "core.fsm_rejected", # Rejected finite state machines by kind. - - # General Riak Search Load / Health metrics - # https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#general-riak-search-load-health-metrics - # Reported as counters. - "search.errors", # Write and read errors of the Search subsystem. -] - -CHARTS = { - # Throughput metrics - "kv.node_operations": { - "options": [None, "Reads & writes coordinated by this node", "operations/s", "throughput", "riak.kv.throughput", - "line"], - "lines": [ - ["node_gets_total", "gets", "incremental"], - ["node_puts_total", "puts", "incremental"] - ] - }, - "dt.vnode_updates": { - "options": [None, "Update operations coordinated by local vnodes by data type", "operations/s", "throughput", - "riak.dt.vnode_updates", "line"], - "lines": [ - ["vnode_counter_update_total", "counters", "incremental"], - ["vnode_set_update_total", "sets", "incremental"], - ["vnode_map_update_total", "maps", "incremental"], - ] - }, - "search.queries": { - "options": [None, "Search queries on the node", "queries/s", "throughput", "riak.search", "line"], - "lines": [ - ["search_query_throughput_count", "queries", "incremental"] - ] - }, - "search.documents": { - "options": [None, "Documents indexed by search", "documents/s", "throughput", "riak.search.documents", "line"], - "lines": [ - ["search_index_throughput_count", "indexed", "incremental"] - ] - }, - "consistent.operations": { - "options": [None, "Consistent node operations", "operations/s", "throughput", "riak.consistent.operations", - "line"], - "lines": [ - ["consistent_gets_total", "gets", "incremental"], - ["consistent_puts_total", "puts", "incremental"], - ] - }, - - # Latency metrics - "kv.latency.get": { - "options": [None, "Time between reception of a client GET request and subsequent response to client", "ms", - "latency", "riak.kv.latency.get", "line"], - "lines": [ - ["node_get_fsm_time_mean", "mean", "absolute", 1, 1000], - ["node_get_fsm_time_median", "median", "absolute", 1, 1000], - ["node_get_fsm_time_95", "95", "absolute", 1, 1000], - ["node_get_fsm_time_99", "99", "absolute", 1, 1000], - ["node_get_fsm_time_100", "100", "absolute", 1, 1000], - ] - }, - "kv.latency.put": { - "options": [None, "Time between reception of a client PUT request and subsequent response to client", "ms", - "latency", "riak.kv.latency.put", "line"], - "lines": [ - ["node_put_fsm_time_mean", "mean", "absolute", 1, 1000], - ["node_put_fsm_time_median", "median", "absolute", 1, 1000], - ["node_put_fsm_time_95", "95", "absolute", 1, 1000], - ["node_put_fsm_time_99", "99", "absolute", 1, 1000], - ["node_put_fsm_time_100", "100", "absolute", 1, 1000], - ] - }, - "dt.latency.counter": { - "options": [None, "Time it takes to perform an Update Counter operation", "ms", "latency", - "riak.dt.latency.counter_merge", "line"], - "lines": [ - ["object_counter_merge_time_mean", "mean", "absolute", 1, 1000], - ["object_counter_merge_time_median", "median", "absolute", 1, 1000], - ["object_counter_merge_time_95", "95", "absolute", 1, 1000], - ["object_counter_merge_time_99", "99", "absolute", 1, 1000], - ["object_counter_merge_time_100", "100", "absolute", 1, 1000], - ] - }, - "dt.latency.set": { - "options": [None, "Time it takes to perform an Update Set operation", "ms", "latency", - "riak.dt.latency.set_merge", "line"], - "lines": [ - ["object_set_merge_time_mean", "mean", "absolute", 1, 1000], - ["object_set_merge_time_median", "median", "absolute", 1, 1000], - ["object_set_merge_time_95", "95", "absolute", 1, 1000], - ["object_set_merge_time_99", "99", "absolute", 1, 1000], - ["object_set_merge_time_100", "100", "absolute", 1, 1000], - ] - }, - "dt.latency.map": { - "options": [None, "Time it takes to perform an Update Map operation", "ms", "latency", - "riak.dt.latency.map_merge", "line"], - "lines": [ - ["object_map_merge_time_mean", "mean", "absolute", 1, 1000], - ["object_map_merge_time_median", "median", "absolute", 1, 1000], - ["object_map_merge_time_95", "95", "absolute", 1, 1000], - ["object_map_merge_time_99", "99", "absolute", 1, 1000], - ["object_map_merge_time_100", "100", "absolute", 1, 1000], - ] - }, - "search.latency.query": { - "options": [None, "Search query latency", "ms", "latency", "riak.search.latency.query", "line"], - "lines": [ - ["search_query_latency_median", "median", "absolute", 1, 1000], - ["search_query_latency_min", "min", "absolute", 1, 1000], - ["search_query_latency_95", "95", "absolute", 1, 1000], - ["search_query_latency_99", "99", "absolute", 1, 1000], - ["search_query_latency_999", "999", "absolute", 1, 1000], - ["search_query_latency_max", "max", "absolute", 1, 1000], - ] - }, - "search.latency.index": { - "options": [None, "Time it takes Search to index a new document", "ms", "latency", "riak.search.latency.index", - "line"], - "lines": [ - ["search_index_latency_median", "median", "absolute", 1, 1000], - ["search_index_latency_min", "min", "absolute", 1, 1000], - ["search_index_latency_95", "95", "absolute", 1, 1000], - ["search_index_latency_99", "99", "absolute", 1, 1000], - ["search_index_latency_999", "999", "absolute", 1, 1000], - ["search_index_latency_max", "max", "absolute", 1, 1000], - ] - }, - - # Riak Strong Consistency metrics - "consistent.latency.get": { - "options": [None, "Strongly consistent read latency", "ms", "latency", "riak.consistent.latency.get", "line"], - "lines": [ - ["consistent_get_time_mean", "mean", "absolute", 1, 1000], - ["consistent_get_time_median", "median", "absolute", 1, 1000], - ["consistent_get_time_95", "95", "absolute", 1, 1000], - ["consistent_get_time_99", "99", "absolute", 1, 1000], - ["consistent_get_time_100", "100", "absolute", 1, 1000], - ] - }, - "consistent.latency.put": { - "options": [None, "Strongly consistent write latency", "ms", "latency", "riak.consistent.latency.put", "line"], - "lines": [ - ["consistent_put_time_mean", "mean", "absolute", 1, 1000], - ["consistent_put_time_median", "median", "absolute", 1, 1000], - ["consistent_put_time_95", "95", "absolute", 1, 1000], - ["consistent_put_time_99", "99", "absolute", 1, 1000], - ["consistent_put_time_100", "100", "absolute", 1, 1000], - ] - }, - - # BEAM metrics - "vm.processes": { - "options": [None, "Total processes running in the Erlang VM", "total", "vm", "riak.vm", "line"], - "lines": [ - ["sys_process_count", "processes", "absolute"], - ] - }, - "vm.memory.processes": { - "options": [None, "Memory allocated & used by Erlang processes", "MB", "vm", "riak.vm.memory.processes", - "line"], - "lines": [ - ["memory_processes", "allocated", "absolute", 1, 1024 * 1024], - ["memory_processes_used", "used", "absolute", 1, 1024 * 1024] - ] - }, - - # General Riak Load/Health metrics - "kv.siblings_encountered.get": { - "options": [None, "Number of siblings encountered during GET operations by this node during the past minute", - "siblings", "load", "riak.kv.siblings_encountered.get", "line"], - "lines": [ - ["node_get_fsm_siblings_mean", "mean", "absolute"], - ["node_get_fsm_siblings_median", "median", "absolute"], - ["node_get_fsm_siblings_95", "95", "absolute"], - ["node_get_fsm_siblings_99", "99", "absolute"], - ["node_get_fsm_siblings_100", "100", "absolute"], - ] - }, - "kv.objsize.get": { - "options": [None, "Object size encountered by this node during the past minute", "KB", "load", - "riak.kv.objsize.get", "line"], - "lines": [ - ["node_get_fsm_objsize_mean", "mean", "absolute", 1, 1024], - ["node_get_fsm_objsize_median", "median", "absolute", 1, 1024], - ["node_get_fsm_objsize_95", "95", "absolute", 1, 1024], - ["node_get_fsm_objsize_99", "99", "absolute", 1, 1024], - ["node_get_fsm_objsize_100", "100", "absolute", 1, 1024], - ] - }, - "search.vnodeq_size": { - "options": [None, - "Number of unprocessed messages in the vnode message queues of Search on this node in the past minute", - "messages", "load", "riak.search.vnodeq_size", "line"], - "lines": [ - ["riak_search_vnodeq_mean", "mean", "absolute"], - ["riak_search_vnodeq_median", "median", "absolute"], - ["riak_search_vnodeq_95", "95", "absolute"], - ["riak_search_vnodeq_99", "99", "absolute"], - ["riak_search_vnodeq_100", "100", "absolute"], - ] - }, - "search.index_errors": { - "options": [None, "Number of document index errors encountered by Search", "errors", "load", - "riak.search.index", "line"], - "lines": [ - ["search_index_fail_count", "errors", "absolute"] - ] - }, - "core.pbc": { - "options": [None, "Protocol buffer connections by status", "connections", "load", - "riak.core.protobuf_connections", "line"], - "lines": [ - ["pbc_active", "active", "absolute"], - # ["pbc_connects", "established_pastmin", "absolute"] - ] - }, - "core.repairs": { - "options": [None, "Number of repair operations this node has coordinated", "repairs", "load", - "riak.core.repairs", "line"], - "lines": [ - ["read_repairs", "read", "absolute"] - ] - }, - "core.fsm_active": { - "options": [None, "Active finite state machines by kind", "fsms", "load", "riak.core.fsm_active", "line"], - "lines": [ - ["node_get_fsm_active", "get", "absolute"], - ["node_put_fsm_active", "put", "absolute"], - ["index_fsm_active", "secondary index", "absolute"], - ["list_fsm_active", "list keys", "absolute"] - ] - }, - "core.fsm_rejected": { - # Writing "Sidejob's" here seems to cause some weird issues: it results in this chart being rendered in - # its own context and additionally, moves the entire Riak graph all the way up to the top of the Netdata - # dashboard for some reason. - "options": [None, "Finite state machines being rejected by Sidejobs overload protection", "fsms", "load", - "riak.core.fsm_rejected", "line"], - "lines": [ - ["node_get_fsm_rejected", "get", "absolute"], - ["node_put_fsm_rejected", "put", "absolute"] - ] - }, - - # General Riak Search Load / Health metrics - "search.errors": { - "options": [None, "Number of writes to Search failed due to bad data format by reason", "writes", "load", - "riak.search.index", "line"], - "lines": [ - ["search_index_bad_entry_count", "bad_entry", "absolute"], - ["search_index_extract_fail_count", "extract_fail", "absolute"], - ] - } -} - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - - def _get_data(self): - """ - Format data received from http request - :return: dict - """ - raw = self._get_raw_data() - if not raw: - return None - - try: - return loads(raw) - except (TypeError, ValueError) as err: - self.error(err) - return None diff --git a/src/collectors/python.d.plugin/riakkv/riakkv.conf b/src/collectors/python.d.plugin/riakkv/riakkv.conf deleted file mode 100644 index be01c48ac..000000000 --- a/src/collectors/python.d.plugin/riakkv/riakkv.conf +++ /dev/null @@ -1,68 +0,0 @@ -# netdata python.d.plugin configuration for riak -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -local: - url : 'http://localhost:8098/stats' diff --git a/src/collectors/python.d.plugin/samba/integrations/samba.md b/src/collectors/python.d.plugin/samba/integrations/samba.md index b4a551a8e..4d6f8fcc3 100644 --- a/src/collectors/python.d.plugin/samba/integrations/samba.md +++ b/src/collectors/python.d.plugin/samba/integrations/samba.md @@ -196,6 +196,7 @@ my_job_name: ### Debug Mode + To troubleshoot issues with the `samba` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -218,4 +219,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin samba debug trace ``` +### Getting Logs + +If you're encountering problems with the `samba` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep samba +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep samba /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep samba +``` + diff --git a/src/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md b/src/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md index 8f7fdaf4d..2e5e60669 100644 --- a/src/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md +++ b/src/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md @@ -191,6 +191,7 @@ remote_server: ### Debug Mode + To troubleshoot issues with the `spigotmc` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -213,4 +214,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin spigotmc debug trace ``` +### Getting Logs + +If you're encountering problems with the `spigotmc` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep spigotmc +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep spigotmc /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep spigotmc +``` + diff --git a/src/collectors/python.d.plugin/squid/README.md b/src/collectors/python.d.plugin/squid/README.md deleted file mode 120000 index c4e5a03d7..000000000 --- a/src/collectors/python.d.plugin/squid/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/squid.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/squid/integrations/squid.md b/src/collectors/python.d.plugin/squid/integrations/squid.md deleted file mode 100644 index 10f927af7..000000000 --- a/src/collectors/python.d.plugin/squid/integrations/squid.md +++ /dev/null @@ -1,199 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/squid/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/squid/metadata.yaml" -sidebar_label: "Squid" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Web Servers and Web Proxies" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Squid - - -<img src="https://netdata.cloud/img/squid.png" width="150"/> - - -Plugin: python.d.plugin -Module: squid - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors statistics about the Squid Clients and Servers, like bandwidth and requests. - - -It collects metrics from the endpoint where Squid exposes its `counters` data. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -By default, this collector will try to autodetect where Squid presents its `counters` data, by trying various configurations. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Squid instance - -These metrics refer to each monitored Squid instance. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| squid.clients_net | in, out, hits | kilobits/s | -| squid.clients_requests | requests, hits, errors | requests/s | -| squid.servers_net | in, out | kilobits/s | -| squid.servers_requests | requests, errors | requests/s | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Configure Squid's Cache Manager - -Take a look at [Squid's official documentation](https://wiki.squid-cache.org/Features/CacheManager/Index#controlling-access-to-the-cache-manager) on how to configure access to the Cache Manager. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/squid.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/squid.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 1 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no | -| host | The host to connect to. | | yes | -| port | The port to connect to. | | yes | -| request | The URL to request from Squid. | | yes | - -</details> - -#### Examples - -##### Basic - -A basic configuration example. - -```yaml -example_job_name: - name: 'local' - host: 'localhost' - port: 3128 - request: 'cache_object://localhost:3128/counters' - -``` -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -local_job: - name: 'local' - host: '127.0.0.1' - port: 3128 - request: 'cache_object://127.0.0.1:3128/counters' - -remote_job: - name: 'remote' - host: '192.0.2.1' - port: 3128 - request: 'cache_object://192.0.2.1:3128/counters' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `squid` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin squid debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/squid/metadata.yaml b/src/collectors/python.d.plugin/squid/metadata.yaml deleted file mode 100644 index d0c5b3ecc..000000000 --- a/src/collectors/python.d.plugin/squid/metadata.yaml +++ /dev/null @@ -1,174 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: squid - monitored_instance: - name: Squid - link: "http://www.squid-cache.org/" - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: "squid.png" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - squid - - web delivery - - squid caching proxy - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors statistics about the Squid Clients and Servers, like bandwidth and requests. - method_description: "It collects metrics from the endpoint where Squid exposes its `counters` data." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "By default, this collector will try to autodetect where Squid presents its `counters` data, by trying various configurations." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Configure Squid's Cache Manager - description: | - Take a look at [Squid's official documentation](https://wiki.squid-cache.org/Features/CacheManager/Index#controlling-access-to-the-cache-manager) on how to configure access to the Cache Manager. - configuration: - file: - name: "python.d/squid.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 1 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: > - Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: "local" - required: false - - name: host - description: The host to connect to. - default_value: "" - required: true - - name: port - description: The port to connect to. - default_value: "" - required: true - - name: request - description: The URL to request from Squid. - default_value: "" - required: true - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - description: A basic configuration example. - folding: - enabled: false - config: | - example_job_name: - name: 'local' - host: 'localhost' - port: 3128 - request: 'cache_object://localhost:3128/counters' - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - local_job: - name: 'local' - host: '127.0.0.1' - port: 3128 - request: 'cache_object://127.0.0.1:3128/counters' - - remote_job: - name: 'remote' - host: '192.0.2.1' - port: 3128 - request: 'cache_object://192.0.2.1:3128/counters' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: Squid instance - description: "These metrics refer to each monitored Squid instance." - labels: [] - metrics: - - name: squid.clients_net - description: Squid Client Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: hits - - name: squid.clients_requests - description: Squid Client Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: hits - - name: errors - - name: squid.servers_net - description: Squid Server Bandwidth - unit: "kilobits/s" - chart_type: area - dimensions: - - name: in - - name: out - - name: squid.servers_requests - description: Squid Server Requests - unit: "requests/s" - chart_type: line - dimensions: - - name: requests - - name: errors diff --git a/src/collectors/python.d.plugin/squid/squid.chart.py b/src/collectors/python.d.plugin/squid/squid.chart.py deleted file mode 100644 index bcae2d892..000000000 --- a/src/collectors/python.d.plugin/squid/squid.chart.py +++ /dev/null @@ -1,123 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: squid netdata python.d module -# Author: Pawel Krupa (paulfantom) -# SPDX-License-Identifier: GPL-3.0-or-later - -from bases.FrameworkServices.SocketService import SocketService - -ORDER = [ - 'clients_net', - 'clients_requests', - 'servers_net', - 'servers_requests', -] - -CHARTS = { - 'clients_net': { - 'options': [None, 'Squid Client Bandwidth', 'kilobits/s', 'clients', 'squid.clients_net', 'area'], - 'lines': [ - ['client_http_kbytes_in', 'in', 'incremental', 8, 1], - ['client_http_kbytes_out', 'out', 'incremental', -8, 1], - ['client_http_hit_kbytes_out', 'hits', 'incremental', -8, 1] - ] - }, - 'clients_requests': { - 'options': [None, 'Squid Client Requests', 'requests/s', 'clients', 'squid.clients_requests', 'line'], - 'lines': [ - ['client_http_requests', 'requests', 'incremental'], - ['client_http_hits', 'hits', 'incremental'], - ['client_http_errors', 'errors', 'incremental', -1, 1] - ] - }, - 'servers_net': { - 'options': [None, 'Squid Server Bandwidth', 'kilobits/s', 'servers', 'squid.servers_net', 'area'], - 'lines': [ - ['server_all_kbytes_in', 'in', 'incremental', 8, 1], - ['server_all_kbytes_out', 'out', 'incremental', -8, 1] - ] - }, - 'servers_requests': { - 'options': [None, 'Squid Server Requests', 'requests/s', 'servers', 'squid.servers_requests', 'line'], - 'lines': [ - ['server_all_requests', 'requests', 'incremental'], - ['server_all_errors', 'errors', 'incremental', -1, 1] - ] - } -} - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - SocketService.__init__(self, configuration=configuration, name=name) - self._keep_alive = True - self.request = '' - self.host = 'localhost' - self.port = 3128 - self.order = ORDER - self.definitions = CHARTS - - def _get_data(self): - """ - Get data via http request - :return: dict - """ - response = self._get_raw_data() - - data = dict() - try: - raw = '' - for tmp in response.split('\r\n'): - if tmp.startswith('sample_time'): - raw = tmp - break - - if raw.startswith('<'): - self.error('invalid data received') - return None - - for row in raw.split('\n'): - if row.startswith(('client', 'server.all')): - tmp = row.split('=') - data[tmp[0].replace('.', '_').strip(' ')] = int(tmp[1]) - - except (ValueError, AttributeError, TypeError): - self.error('invalid data received') - return None - - if not data: - self.error('no data received') - return None - return data - - def _check_raw_data(self, data): - header = data[:1024].lower() - - if 'connection: keep-alive' in header: - self._keep_alive = True - else: - self._keep_alive = False - - if data[-7:] == '\r\n0\r\n\r\n' and 'transfer-encoding: chunked' in header: # HTTP/1.1 response - self.debug('received full response from squid') - return True - - self.debug('waiting more data from squid') - return False - - def check(self): - """ - Parse essential configuration, autodetect squid configuration (if needed), and check if data is available - :return: boolean - """ - self._parse_config() - # format request - req = self.request.decode() - if not req.startswith('GET'): - req = 'GET ' + req - if not req.endswith(' HTTP/1.1\r\n\r\n'): - req += ' HTTP/1.1\r\n\r\n' - self.request = req.encode() - if self._get_data() is not None: - return True - else: - return False diff --git a/src/collectors/python.d.plugin/squid/squid.conf b/src/collectors/python.d.plugin/squid/squid.conf deleted file mode 100644 index b90a52c0c..000000000 --- a/src/collectors/python.d.plugin/squid/squid.conf +++ /dev/null @@ -1,167 +0,0 @@ -# netdata python.d.plugin configuration for squid -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, squid also supports the following: -# -# host : 'IP or HOSTNAME' # the host to connect to -# port : PORT # the port to connect to -# request: 'URL' # the URL to request from squid -# - -# ---------------------------------------------------------------------- -# SQUID CONFIGURATION -# -# See: -# http://wiki.squid-cache.org/Features/CacheManager -# -# In short, add to your squid configuration these: -# -# http_access allow localhost manager -# http_access deny manager -# -# To remotely monitor a squid: -# -# acl managerAdmin src 192.0.2.1 -# http_access allow localhost manager -# http_access allow managerAdmin manager -# http_access deny manager -# - -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -tcp3128old: - name : 'local' - host : 'localhost' - port : 3128 - request : 'cache_object://localhost:3128/counters' - -tcp8080old: - name : 'local' - host : 'localhost' - port : 8080 - request : 'cache_object://localhost:3128/counters' - -tcp3128new: - name : 'local' - host : 'localhost' - port : 3128 - request : '/squid-internal-mgr/counters' - -tcp8080new: - name : 'local' - host : 'localhost' - port : 8080 - request : '/squid-internal-mgr/counters' - -# IPv4 - -tcp3128oldipv4: - name : 'local' - host : '127.0.0.1' - port : 3128 - request : 'cache_object://127.0.0.1:3128/counters' - -tcp8080oldipv4: - name : 'local' - host : '127.0.0.1' - port : 8080 - request : 'cache_object://127.0.0.1:3128/counters' - -tcp3128newipv4: - name : 'local' - host : '127.0.0.1' - port : 3128 - request : '/squid-internal-mgr/counters' - -tcp8080newipv4: - name : 'local' - host : '127.0.0.1' - port : 8080 - request : '/squid-internal-mgr/counters' - -# IPv6 - -tcp3128oldipv6: - name : 'local' - host : '::1' - port : 3128 - request : 'cache_object://[::1]:3128/counters' - -tcp8080oldipv6: - name : 'local' - host : '::1' - port : 8080 - request : 'cache_object://[::1]:3128/counters' - -tcp3128newipv6: - name : 'local' - host : '::1' - port : 3128 - request : '/squid-internal-mgr/counters' - -tcp8080newipv6: - name : 'local' - host : '::1' - port : 8080 - request : '/squid-internal-mgr/counters' - diff --git a/src/collectors/python.d.plugin/tomcat/README.md b/src/collectors/python.d.plugin/tomcat/README.md deleted file mode 120000 index 997090c35..000000000 --- a/src/collectors/python.d.plugin/tomcat/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/tomcat.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/tomcat/integrations/tomcat.md b/src/collectors/python.d.plugin/tomcat/integrations/tomcat.md deleted file mode 100644 index 64938ad62..000000000 --- a/src/collectors/python.d.plugin/tomcat/integrations/tomcat.md +++ /dev/null @@ -1,203 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/tomcat/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/tomcat/metadata.yaml" -sidebar_label: "Tomcat" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Web Servers and Web Proxies" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Tomcat - - -<img src="https://netdata.cloud/img/tomcat.svg" width="150"/> - - -Plugin: python.d.plugin -Module: tomcat - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Tomcat metrics about bandwidth, processing time, threads and more. - - -It parses the information provided by the http endpoint of the `/manager/status` in XML format - - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - -You need to provide the username and the password, to access the webserver's status page. Create a seperate user with read only rights for this particular endpoint - -### Default Behavior - -#### Auto-Detection - -If the Netdata Agent and the Tomcat webserver are in the same host, without configuration, module attempts to connect to http://localhost:8080/manager/status?XML=true, without any credentials. So it will probably fail. - -#### Limits - -This module is not supporting SSL communication. If you want a Netdata Agent to monitor a Tomcat deployment, you shouldnt try to monitor it via public network (public internet). Credentials are passed by Netdata in an unsecure port - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Tomcat instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| tomcat.accesses | accesses, errors | requests/s | -| tomcat.bandwidth | sent, received | KiB/s | -| tomcat.processing_time | processing time | seconds | -| tomcat.threads | current, busy | current threads | -| tomcat.jvm | free, eden, survivor, tenured, code cache, compressed, metaspace | MiB | -| tomcat.jvm_eden | used, committed, max | MiB | -| tomcat.jvm_survivor | used, committed, max | MiB | -| tomcat.jvm_tenured | used, committed, max | MiB | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Create a read-only `netdata` user, to monitor the `/status` endpoint. - -This is necessary for configuring the collector. - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/tomcat.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/tomcat.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options per job</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| url | The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. | no | yes | -| user | A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected | no | no | -| pass | A valid password for the user in question. Required if the endpoint is password protected | no | no | -| connector_name | The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 | | no | - -</details> - -#### Examples - -##### Basic - -A basic example configuration - -```yaml -localhost: - name : 'local' - url : 'http://localhost:8080/manager/status?XML=true' - -``` -##### Using an IPv4 endpoint - -A typical configuration using an IPv4 endpoint - -<details open><summary>Config</summary> - -```yaml -local_ipv4: - name : 'local' - url : 'http://127.0.0.1:8080/manager/status?XML=true' - -``` -</details> - -##### Using an IPv6 endpoint - -A typical configuration using an IPv6 endpoint - -<details open><summary>Config</summary> - -```yaml -local_ipv6: - name : 'local' - url : 'http://[::1]:8080/manager/status?XML=true' - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `tomcat` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin tomcat debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/tomcat/metadata.yaml b/src/collectors/python.d.plugin/tomcat/metadata.yaml deleted file mode 100644 index e68526073..000000000 --- a/src/collectors/python.d.plugin/tomcat/metadata.yaml +++ /dev/null @@ -1,200 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: tomcat - monitored_instance: - name: Tomcat - link: "https://tomcat.apache.org/" - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: "tomcat.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - apache - - tomcat - - webserver - - websocket - - jakarta - - javaEE - most_popular: false - overview: - data_collection: - metrics_description: | - This collector monitors Tomcat metrics about bandwidth, processing time, threads and more. - method_description: | - It parses the information provided by the http endpoint of the `/manager/status` in XML format - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "You need to provide the username and the password, to access the webserver's status page. Create a seperate user with read only rights for this particular endpoint" - default_behavior: - auto_detection: - description: "If the Netdata Agent and the Tomcat webserver are in the same host, without configuration, module attempts to connect to http://localhost:8080/manager/status?XML=true, without any credentials. So it will probably fail." - limits: - description: "This module is not supporting SSL communication. If you want a Netdata Agent to monitor a Tomcat deployment, you shouldnt try to monitor it via public network (public internet). Credentials are passed by Netdata in an unsecure port" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Create a read-only `netdata` user, to monitor the `/status` endpoint. - description: This is necessary for configuring the collector. - configuration: - file: - name: "python.d/tomcat.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options per job" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: url - description: The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. - default_value: no - required: true - - name: user - description: A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected - default_value: no - required: false - - name: pass - description: A valid password for the user in question. Required if the endpoint is password protected - default_value: no - required: false - - name: connector_name - description: The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 - default_value: "" - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic - folding: - enabled: false - description: A basic example configuration - config: | - localhost: - name : 'local' - url : 'http://localhost:8080/manager/status?XML=true' - - name: Using an IPv4 endpoint - description: A typical configuration using an IPv4 endpoint - config: | - local_ipv4: - name : 'local' - url : 'http://127.0.0.1:8080/manager/status?XML=true' - - name: Using an IPv6 endpoint - description: A typical configuration using an IPv6 endpoint - config: | - local_ipv6: - name : 'local' - url : 'http://[::1]:8080/manager/status?XML=true' - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: tomcat.accesses - description: Requests - unit: "requests/s" - chart_type: area - dimensions: - - name: accesses - - name: errors - - name: tomcat.bandwidth - description: Bandwidth - unit: "KiB/s" - chart_type: area - dimensions: - - name: sent - - name: received - - name: tomcat.processing_time - description: processing time - unit: "seconds" - chart_type: area - dimensions: - - name: processing time - - name: tomcat.threads - description: Threads - unit: "current threads" - chart_type: area - dimensions: - - name: current - - name: busy - - name: tomcat.jvm - description: JVM Memory Pool Usage - unit: "MiB" - chart_type: stacked - dimensions: - - name: free - - name: eden - - name: survivor - - name: tenured - - name: code cache - - name: compressed - - name: metaspace - - name: tomcat.jvm_eden - description: Eden Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max - - name: tomcat.jvm_survivor - description: Survivor Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max - - name: tomcat.jvm_tenured - description: Tenured Memory Usage - unit: "MiB" - chart_type: area - dimensions: - - name: used - - name: committed - - name: max diff --git a/src/collectors/python.d.plugin/tomcat/tomcat.chart.py b/src/collectors/python.d.plugin/tomcat/tomcat.chart.py deleted file mode 100644 index 90315f8c7..000000000 --- a/src/collectors/python.d.plugin/tomcat/tomcat.chart.py +++ /dev/null @@ -1,199 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: tomcat netdata python.d module -# Author: Pawel Krupa (paulfantom) -# Author: Wei He (Wing924) -# SPDX-License-Identifier: GPL-3.0-or-later - -import re -import xml.etree.ElementTree as ET - -from bases.FrameworkServices.UrlService import UrlService - -MiB = 1 << 20 - -# Regex fix for Tomcat single quote XML attributes -# affecting Tomcat < 8.5.24 & 9.0.2 running with Java > 9 -# cf. https://bz.apache.org/bugzilla/show_bug.cgi?id=61603 -single_quote_regex = re.compile(r"='([^']+)'([^']+)''") - -ORDER = [ - 'accesses', - 'bandwidth', - 'processing_time', - 'threads', - 'jvm', - 'jvm_eden', - 'jvm_survivor', - 'jvm_tenured', -] - -CHARTS = { - 'accesses': { - 'options': [None, 'Requests', 'requests/s', 'statistics', 'tomcat.accesses', 'area'], - 'lines': [ - ['requestCount', 'accesses', 'incremental'], - ['errorCount', 'errors', 'incremental'], - ] - }, - 'bandwidth': { - 'options': [None, 'Bandwidth', 'KiB/s', 'statistics', 'tomcat.bandwidth', 'area'], - 'lines': [ - ['bytesSent', 'sent', 'incremental', 1, 1024], - ['bytesReceived', 'received', 'incremental', 1, 1024], - ] - }, - 'processing_time': { - 'options': [None, 'processing time', 'seconds', 'statistics', 'tomcat.processing_time', 'area'], - 'lines': [ - ['processingTime', 'processing time', 'incremental', 1, 1000] - ] - }, - 'threads': { - 'options': [None, 'Threads', 'current threads', 'statistics', 'tomcat.threads', 'area'], - 'lines': [ - ['currentThreadCount', 'current', 'absolute'], - ['currentThreadsBusy', 'busy', 'absolute'] - ] - }, - 'jvm': { - 'options': [None, 'JVM Memory Pool Usage', 'MiB', 'memory', 'tomcat.jvm', 'stacked'], - 'lines': [ - ['free', 'free', 'absolute', 1, MiB], - ['eden_used', 'eden', 'absolute', 1, MiB], - ['survivor_used', 'survivor', 'absolute', 1, MiB], - ['tenured_used', 'tenured', 'absolute', 1, MiB], - ['code_cache_used', 'code cache', 'absolute', 1, MiB], - ['compressed_used', 'compressed', 'absolute', 1, MiB], - ['metaspace_used', 'metaspace', 'absolute', 1, MiB], - ] - }, - 'jvm_eden': { - 'options': [None, 'Eden Memory Usage', 'MiB', 'memory', 'tomcat.jvm_eden', 'area'], - 'lines': [ - ['eden_used', 'used', 'absolute', 1, MiB], - ['eden_committed', 'committed', 'absolute', 1, MiB], - ['eden_max', 'max', 'absolute', 1, MiB] - ] - }, - 'jvm_survivor': { - 'options': [None, 'Survivor Memory Usage', 'MiB', 'memory', 'tomcat.jvm_survivor', 'area'], - 'lines': [ - ['survivor_used', 'used', 'absolute', 1, MiB], - ['survivor_committed', 'committed', 'absolute', 1, MiB], - ['survivor_max', 'max', 'absolute', 1, MiB], - ] - }, - 'jvm_tenured': { - 'options': [None, 'Tenured Memory Usage', 'MiB', 'memory', 'tomcat.jvm_tenured', 'area'], - 'lines': [ - ['tenured_used', 'used', 'absolute', 1, MiB], - ['tenured_committed', 'committed', 'absolute', 1, MiB], - ['tenured_max', 'max', 'absolute', 1, MiB] - ] - } -} - - -class Service(UrlService): - def __init__(self, configuration=None, name=None): - UrlService.__init__(self, configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.url = self.configuration.get('url', 'http://127.0.0.1:8080/manager/status?XML=true') - self.connector_name = self.configuration.get('connector_name', None) - self.parse = self.xml_parse - - def xml_parse(self, data): - try: - return ET.fromstring(data) - except ET.ParseError: - self.debug('%s is not a valid XML page. Please add "?XML=true" to tomcat status page.' % self.url) - return None - - def xml_single_quote_fix_parse(self, data): - data = single_quote_regex.sub(r"='\g<1>\g<2>'", data) - return self.xml_parse(data) - - def check(self): - self._manager = self._build_manager() - - raw_data = self._get_raw_data() - if not raw_data: - return False - - if single_quote_regex.search(raw_data): - self.warning('Tomcat status page is returning invalid single quote XML, please consider upgrading ' - 'your Tomcat installation. See https://bz.apache.org/bugzilla/show_bug.cgi?id=61603') - self.parse = self.xml_single_quote_fix_parse - - return self.parse(raw_data) is not None - - def _get_data(self): - """ - Format data received from http request - :return: dict - """ - data = None - raw_data = self._get_raw_data() - if raw_data: - xml = self.parse(raw_data) - if xml is None: - return None - - data = {} - - jvm = xml.find('jvm') - - connector = None - if self.connector_name: - for conn in xml.findall('connector'): - if self.connector_name in conn.get('name'): - connector = conn - break - else: - connector = xml.find('connector') - - memory = jvm.find('memory') - data['free'] = memory.get('free') - data['total'] = memory.get('total') - - for pool in jvm.findall('memorypool'): - name = pool.get('name') - if 'Eden Space' in name: - data['eden_used'] = pool.get('usageUsed') - data['eden_committed'] = pool.get('usageCommitted') - data['eden_max'] = pool.get('usageMax') - elif 'Survivor Space' in name: - data['survivor_used'] = pool.get('usageUsed') - data['survivor_committed'] = pool.get('usageCommitted') - data['survivor_max'] = pool.get('usageMax') - elif 'Tenured Gen' in name or 'Old Gen' in name: - data['tenured_used'] = pool.get('usageUsed') - data['tenured_committed'] = pool.get('usageCommitted') - data['tenured_max'] = pool.get('usageMax') - elif name == 'Code Cache': - data['code_cache_used'] = pool.get('usageUsed') - data['code_cache_committed'] = pool.get('usageCommitted') - data['code_cache_max'] = pool.get('usageMax') - elif name == 'Compressed': - data['compressed_used'] = pool.get('usageUsed') - data['compressed_committed'] = pool.get('usageCommitted') - data['compressed_max'] = pool.get('usageMax') - elif name == 'Metaspace': - data['metaspace_used'] = pool.get('usageUsed') - data['metaspace_committed'] = pool.get('usageCommitted') - data['metaspace_max'] = pool.get('usageMax') - - if connector is not None: - thread_info = connector.find('threadInfo') - data['currentThreadsBusy'] = thread_info.get('currentThreadsBusy') - data['currentThreadCount'] = thread_info.get('currentThreadCount') - - request_info = connector.find('requestInfo') - data['processingTime'] = request_info.get('processingTime') - data['requestCount'] = request_info.get('requestCount') - data['errorCount'] = request_info.get('errorCount') - data['bytesReceived'] = request_info.get('bytesReceived') - data['bytesSent'] = request_info.get('bytesSent') - - return data or None diff --git a/src/collectors/python.d.plugin/tomcat/tomcat.conf b/src/collectors/python.d.plugin/tomcat/tomcat.conf deleted file mode 100644 index 009591bdf..000000000 --- a/src/collectors/python.d.plugin/tomcat/tomcat.conf +++ /dev/null @@ -1,89 +0,0 @@ -# netdata python.d.plugin configuration for tomcat -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, tomcat also supports the following: -# -# url: 'URL' # the URL to fetch nginx's status stats -# -# if the URL is password protected, the following are supported: -# -# user: 'username' -# pass: 'password' -# -# if you have multiple connectors, the following are supported: -# -# connector_name: 'ajp-bio-8009' # default is null, which use first connector in status XML -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) - -localhost: - name : 'local' - url : 'http://localhost:8080/manager/status?XML=true' - -localipv4: - name : 'local' - url : 'http://127.0.0.1:8080/manager/status?XML=true' - -localipv6: - name : 'local' - url : 'http://[::1]:8080/manager/status?XML=true' diff --git a/src/collectors/python.d.plugin/tor/README.md b/src/collectors/python.d.plugin/tor/README.md deleted file mode 120000 index 7c20cd40a..000000000 --- a/src/collectors/python.d.plugin/tor/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/tor.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/tor/integrations/tor.md b/src/collectors/python.d.plugin/tor/integrations/tor.md deleted file mode 100644 index 728245cfa..000000000 --- a/src/collectors/python.d.plugin/tor/integrations/tor.md +++ /dev/null @@ -1,197 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/tor/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/tor/metadata.yaml" -sidebar_label: "Tor" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/VPNs" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# Tor - - -<img src="https://netdata.cloud/img/tor.svg" width="150"/> - - -Plugin: python.d.plugin -Module: tor - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors Tor bandwidth traffic . - -It connects to the Tor control port to collect traffic statistics. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -If no configuration is provided the collector will try to connect to 127.0.0.1:9051 to detect a running tor instance. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per Tor instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| tor.traffic | read, write | KiB/s | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Required python module - -The `stem` python library needs to be installed. - - -#### Required Tor configuration - -Add to /etc/tor/torrc: - -ControlPort 9051 - -For more options please read the manual. - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/tor.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/tor.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no | -| control_addr | Tor control IP address | 127.0.0.1 | no | -| control_port | Tor control port. Can be either a tcp port, or a path to a socket file. | 9051 | no | -| password | Tor control password | | no | - -</details> - -#### Examples - -##### Local TCP - -A basic TCP configuration. `local_addr` is ommited and will default to `127.0.0.1` - -<details open><summary>Config</summary> - -```yaml -local_tcp: - name: 'local' - control_port: 9051 - password: <password> # if required - -``` -</details> - -##### Local socket - -A basic local socket configuration - -<details open><summary>Config</summary> - -```yaml -local_socket: - name: 'local' - control_port: '/var/run/tor/control' - password: <password> # if required - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `tor` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin tor debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/tor/metadata.yaml b/src/collectors/python.d.plugin/tor/metadata.yaml deleted file mode 100644 index 8647eca23..000000000 --- a/src/collectors/python.d.plugin/tor/metadata.yaml +++ /dev/null @@ -1,143 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: tor - monitored_instance: - name: Tor - link: 'https://www.torproject.org/' - categories: - - data-collection.vpns - icon_filename: 'tor.svg' - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: '' - keywords: - - tor - - traffic - - vpn - most_popular: false - overview: - data_collection: - metrics_description: 'This collector monitors Tor bandwidth traffic .' - method_description: 'It connects to the Tor control port to collect traffic statistics.' - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: '' - default_behavior: - auto_detection: - description: 'If no configuration is provided the collector will try to connect to 127.0.0.1:9051 to detect a running tor instance.' - limits: - description: '' - performance_impact: - description: '' - setup: - prerequisites: - list: - - title: 'Required python module' - description: | - The `stem` python library needs to be installed. - - title: 'Required Tor configuration' - description: | - Add to /etc/tor/torrc: - - ControlPort 9051 - - For more options please read the manual. - configuration: - file: - name: python.d/tor.conf - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. - default_value: '' - required: false - - name: control_addr - description: Tor control IP address - default_value: 127.0.0.1 - required: false - - name: control_port - description: Tor control port. Can be either a tcp port, or a path to a socket file. - default_value: 9051 - required: false - - name: password - description: Tor control password - default_value: '' - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Local TCP - description: A basic TCP configuration. `local_addr` is ommited and will default to `127.0.0.1` - config: | - local_tcp: - name: 'local' - control_port: 9051 - password: <password> # if required - - name: Local socket - description: A basic local socket configuration - config: | - local_socket: - name: 'local' - control_port: '/var/run/tor/control' - password: <password> # if required - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: tor.traffic - description: Tor Traffic - unit: "KiB/s" - chart_type: area - dimensions: - - name: read - - name: write diff --git a/src/collectors/python.d.plugin/tor/tor.chart.py b/src/collectors/python.d.plugin/tor/tor.chart.py deleted file mode 100644 index f7bc2d79b..000000000 --- a/src/collectors/python.d.plugin/tor/tor.chart.py +++ /dev/null @@ -1,109 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: adaptec_raid netdata python.d module -# Author: Federico Ceratto <federico.ceratto@gmail.com> -# Author: Ilya Mashchenko (ilyam8) -# SPDX-License-Identifier: GPL-3.0-or-later - - -from bases.FrameworkServices.SimpleService import SimpleService - -try: - import stem - import stem.connection - import stem.control - - STEM_AVAILABLE = True -except ImportError: - STEM_AVAILABLE = False - -DEF_PORT = 'default' -DEF_ADDR = '127.0.0.1' - -ORDER = [ - 'traffic', -] - -CHARTS = { - 'traffic': { - 'options': [None, 'Tor Traffic', 'KiB/s', 'traffic', 'tor.traffic', 'area'], - 'lines': [ - ['read', 'read', 'incremental', 1, 1024], - ['write', 'write', 'incremental', 1, -1024], - ] - } -} - - -class Service(SimpleService): - """Provide netdata service for Tor""" - - def __init__(self, configuration=None, name=None): - super(Service, self).__init__(configuration=configuration, name=name) - self.order = ORDER - self.definitions = CHARTS - self.port = self.configuration.get('control_port', DEF_PORT) - self.addr = self.configuration.get('control_addr', DEF_ADDR) - self.password = self.configuration.get('password') - self.use_socket = isinstance(self.port, str) and self.port != DEF_PORT and not self.port.isdigit() - self.conn = None - self.alive = False - - def check(self): - if not STEM_AVAILABLE: - self.error('the stem library is missing') - return False - - return self.connect() - - def get_data(self): - if not self.alive and not self.reconnect(): - return None - - data = dict() - - try: - data['read'] = self.conn.get_info('traffic/read') - data['write'] = self.conn.get_info('traffic/written') - except stem.ControllerError as error: - self.debug(error) - self.alive = False - - return data or None - - def authenticate(self): - try: - self.conn.authenticate(password=self.password) - except stem.connection.AuthenticationFailure as error: - self.error('authentication error: {0}'.format(error)) - return False - return True - - def connect_via_port(self): - try: - self.conn = stem.control.Controller.from_port(address=self.addr, port=self.port) - except (stem.SocketError, ValueError) as error: - self.error(error) - - def connect_via_socket(self): - try: - self.conn = stem.control.Controller.from_socket_file(path=self.port) - except (stem.SocketError, ValueError) as error: - self.error(error) - - def connect(self): - if self.conn: - self.conn.close() - self.conn = None - - if self.use_socket: - self.connect_via_socket() - else: - self.connect_via_port() - - if self.conn and self.authenticate(): - self.alive = True - - return self.alive - - def reconnect(self): - return self.connect() diff --git a/src/collectors/python.d.plugin/tor/tor.conf b/src/collectors/python.d.plugin/tor/tor.conf deleted file mode 100644 index c7c98dc0b..000000000 --- a/src/collectors/python.d.plugin/tor/tor.conf +++ /dev/null @@ -1,81 +0,0 @@ -# netdata python.d.plugin configuration for tor -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, tor plugin also supports the following: -# -# control_addr: 'address' # tor control IP address (defaults to '127.0.0.1') -# control_port: 'port' # tor control port -# password: 'password' # tor control password -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) -# -# local_tcp: -# name: 'local' -# control_port: 9051 -# control_addr: 127.0.0.1 -# password: <password> -# -# local_socket: -# name: 'local' -# control_port: '/var/run/tor/control' -# password: <password> diff --git a/src/collectors/python.d.plugin/traefik/metadata.yaml b/src/collectors/python.d.plugin/traefik/metadata.yaml index 1d65a3dfe..5382ad54f 100644 --- a/src/collectors/python.d.plugin/traefik/metadata.yaml +++ b/src/collectors/python.d.plugin/traefik/metadata.yaml @@ -1,5 +1,5 @@ # This collector will not appear in documentation, as the go version is preferred, -# /src/go/collectors/go.d.plugin/modules/traefik/README.md +# /src/go/plugin/go.d/modules/traefik/README.md # # meta: # plugin_name: python.d.plugin diff --git a/src/collectors/python.d.plugin/uwsgi/README.md b/src/collectors/python.d.plugin/uwsgi/README.md deleted file mode 120000 index 44b855949..000000000 --- a/src/collectors/python.d.plugin/uwsgi/README.md +++ /dev/null @@ -1 +0,0 @@ -integrations/uwsgi.md
\ No newline at end of file diff --git a/src/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md b/src/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md deleted file mode 100644 index 508d9d195..000000000 --- a/src/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md +++ /dev/null @@ -1,219 +0,0 @@ -<!--startmeta -custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/uwsgi/README.md" -meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/uwsgi/metadata.yaml" -sidebar_label: "uWSGI" -learn_status: "Published" -learn_rel_path: "Collecting Metrics/Web Servers and Web Proxies" -most_popular: False -message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" -endmeta--> - -# uWSGI - - -<img src="https://netdata.cloud/img/uwsgi.svg" width="150"/> - - -Plugin: python.d.plugin -Module: uwsgi - -<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> - -## Overview - -This collector monitors uWSGI metrics about requests, workers, memory and more. - -It collects every metric exposed from the stats server of uWSGI, either from the `stats.socket` or from the web server's TCP/IP socket. - -This collector is supported on all platforms. - -This collector supports collecting metrics from multiple instances of this integration, including remote instances. - - -### Default Behavior - -#### Auto-Detection - -This collector will auto-detect uWSGI instances deployed on the local host, running on port 1717, or exposing stats on socket `tmp/stats.socket`. - -#### Limits - -The default configuration for this integration does not impose any limits on data collection. - -#### Performance Impact - -The default configuration for this integration is not expected to impose a significant performance impact on the system. - - -## Metrics - -Metrics grouped by *scope*. - -The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. - - - -### Per uWSGI instance - -These metrics refer to the entire monitored application. - -This scope has no labels. - -Metrics: - -| Metric | Dimensions | Unit | -|:------|:----------|:----| -| uwsgi.requests | a dimension per worker | requests/s | -| uwsgi.tx | a dimension per worker | KiB/s | -| uwsgi.avg_rt | a dimension per worker | milliseconds | -| uwsgi.memory_rss | a dimension per worker | MiB | -| uwsgi.memory_vsz | a dimension per worker | MiB | -| uwsgi.exceptions | exceptions | exceptions | -| uwsgi.harakiris | harakiris | harakiris | -| uwsgi.respawns | respawns | respawns | - - - -## Alerts - -There are no alerts configured by default for this integration. - - -## Setup - -### Prerequisites - -#### Enable the uWSGI Stats server - -Make sure that you uWSGI exposes it's metrics via a Stats server. - -Source: https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html - - - -### Configuration - -#### File - -The configuration file name for this integration is `python.d/uwsgi.conf`. - - -You can edit the configuration file using the `edit-config` script from the -Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). - -```bash -cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata -sudo ./edit-config python.d/uwsgi.conf -``` -#### Options - -There are 2 sections: - -* Global variables -* One or more JOBS that can define multiple different instances to monitor. - -The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - -Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - -Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - - -<details open><summary>Config options</summary> - -| Name | Description | Default | Required | -|:----|:-----------|:-------|:--------:| -| update_every | Sets the default data collection frequency. | 5 | no | -| priority | Controls the order of charts at the netdata dashboard. | 60000 | no | -| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no | -| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no | -| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no | -| socket | The 'path/to/uwsgistats.sock' | no | no | -| host | The host to connect to | no | no | -| port | The port to connect to | no | no | - -</details> - -#### Examples - -##### Basic (default out-of-the-box) - -A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. As all JOBs have the same name, only one can run at a time. - -<details open><summary>Config</summary> - -```yaml -socket: - name : 'local' - socket : '/tmp/stats.socket' - -localhost: - name : 'local' - host : 'localhost' - port : 1717 - -localipv4: - name : 'local' - host : '127.0.0.1' - port : 1717 - -localipv6: - name : 'local' - host : '::1' - port : 1717 - -``` -</details> - -##### Multi-instance - -> **Note**: When you define multiple jobs, their names must be unique. - -Collecting metrics from local and remote instances. - - -<details open><summary>Config</summary> - -```yaml -local: - name : 'local' - host : 'localhost' - port : 1717 - -remote: - name : 'remote' - host : '192.0.2.1' - port : 1717 - -``` -</details> - - - -## Troubleshooting - -### Debug Mode - -To troubleshoot issues with the `uwsgi` collector, run the `python.d.plugin` with the debug option enabled. The output -should give you clues as to why the collector isn't working. - -- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on - your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. - - ```bash - cd /usr/libexec/netdata/plugins.d/ - ``` - -- Switch to the `netdata` user. - - ```bash - sudo -u netdata -s - ``` - -- Run the `python.d.plugin` to debug the collector: - - ```bash - ./python.d.plugin uwsgi debug trace - ``` - - diff --git a/src/collectors/python.d.plugin/uwsgi/metadata.yaml b/src/collectors/python.d.plugin/uwsgi/metadata.yaml deleted file mode 100644 index cdb090ac1..000000000 --- a/src/collectors/python.d.plugin/uwsgi/metadata.yaml +++ /dev/null @@ -1,201 +0,0 @@ -plugin_name: python.d.plugin -modules: - - meta: - plugin_name: python.d.plugin - module_name: uwsgi - monitored_instance: - name: uWSGI - link: "https://github.com/unbit/uwsgi/tree/2.0.21" - categories: - - data-collection.web-servers-and-web-proxies - icon_filename: "uwsgi.svg" - related_resources: - integrations: - list: [] - info_provided_to_referring_integrations: - description: "" - keywords: - - application server - - python - - web applications - most_popular: false - overview: - data_collection: - metrics_description: "This collector monitors uWSGI metrics about requests, workers, memory and more." - method_description: "It collects every metric exposed from the stats server of uWSGI, either from the `stats.socket` or from the web server's TCP/IP socket." - supported_platforms: - include: [] - exclude: [] - multi_instance: true - additional_permissions: - description: "" - default_behavior: - auto_detection: - description: "This collector will auto-detect uWSGI instances deployed on the local host, running on port 1717, or exposing stats on socket `tmp/stats.socket`." - limits: - description: "" - performance_impact: - description: "" - setup: - prerequisites: - list: - - title: Enable the uWSGI Stats server - description: | - Make sure that you uWSGI exposes it's metrics via a Stats server. - - Source: https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html - configuration: - file: - name: "python.d/uwsgi.conf" - options: - description: | - There are 2 sections: - - * Global variables - * One or more JOBS that can define multiple different instances to monitor. - - The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values. - - Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition. - - Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. - folding: - title: "Config options" - enabled: true - list: - - name: update_every - description: Sets the default data collection frequency. - default_value: 5 - required: false - - name: priority - description: Controls the order of charts at the netdata dashboard. - default_value: 60000 - required: false - - name: autodetection_retry - description: Sets the job re-check interval in seconds. - default_value: 0 - required: false - - name: penalty - description: Indicates whether to apply penalty to update_every in case of failures. - default_value: yes - required: false - - name: name - description: The JOB's name as it will appear at the dashboard (by default is the job_name) - default_value: job_name - required: false - - name: socket - description: The 'path/to/uwsgistats.sock' - default_value: no - required: false - - name: host - description: The host to connect to - default_value: no - required: false - - name: port - description: The port to connect to - default_value: no - required: false - examples: - folding: - enabled: true - title: "Config" - list: - - name: Basic (default out-of-the-box) - description: A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. As all JOBs have the same name, only one can run at a time. - config: | - socket: - name : 'local' - socket : '/tmp/stats.socket' - - localhost: - name : 'local' - host : 'localhost' - port : 1717 - - localipv4: - name : 'local' - host : '127.0.0.1' - port : 1717 - - localipv6: - name : 'local' - host : '::1' - port : 1717 - - name: Multi-instance - description: | - > **Note**: When you define multiple jobs, their names must be unique. - - Collecting metrics from local and remote instances. - config: | - local: - name : 'local' - host : 'localhost' - port : 1717 - - remote: - name : 'remote' - host : '192.0.2.1' - port : 1717 - troubleshooting: - problems: - list: [] - alerts: [] - metrics: - folding: - title: Metrics - enabled: false - description: "" - availability: [] - scopes: - - name: global - description: "These metrics refer to the entire monitored application." - labels: [] - metrics: - - name: uwsgi.requests - description: Requests - unit: "requests/s" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.tx - description: Transmitted data - unit: "KiB/s" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.avg_rt - description: Average request time - unit: "milliseconds" - chart_type: line - dimensions: - - name: a dimension per worker - - name: uwsgi.memory_rss - description: RSS (Resident Set Size) - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.memory_vsz - description: VSZ (Virtual Memory Size) - unit: "MiB" - chart_type: stacked - dimensions: - - name: a dimension per worker - - name: uwsgi.exceptions - description: Exceptions - unit: "exceptions" - chart_type: line - dimensions: - - name: exceptions - - name: uwsgi.harakiris - description: Harakiris - unit: "harakiris" - chart_type: line - dimensions: - - name: harakiris - - name: uwsgi.respawns - description: Respawns - unit: "respawns" - chart_type: line - dimensions: - - name: respawns diff --git a/src/collectors/python.d.plugin/uwsgi/uwsgi.chart.py b/src/collectors/python.d.plugin/uwsgi/uwsgi.chart.py deleted file mode 100644 index e4d900005..000000000 --- a/src/collectors/python.d.plugin/uwsgi/uwsgi.chart.py +++ /dev/null @@ -1,177 +0,0 @@ -# -*- coding: utf-8 -*- -# Description: uwsgi netdata python.d module -# Author: Robbert Segeren (robbert-ef) -# SPDX-License-Identifier: GPL-3.0-or-later - -import json -from copy import deepcopy - -from bases.FrameworkServices.SocketService import SocketService - -ORDER = [ - 'requests', - 'tx', - 'avg_rt', - 'memory_rss', - 'memory_vsz', - 'exceptions', - 'harakiri', - 'respawn', -] - -DYNAMIC_CHARTS = [ - 'requests', - 'tx', - 'avg_rt', - 'memory_rss', - 'memory_vsz', -] - -# NOTE: lines are created dynamically in `check()` method -CHARTS = { - 'requests': { - 'options': [None, 'Requests', 'requests/s', 'requests', 'uwsgi.requests', 'stacked'], - 'lines': [ - ['requests', 'requests', 'incremental'] - ] - }, - 'tx': { - 'options': [None, 'Transmitted data', 'KiB/s', 'requests', 'uwsgi.tx', 'stacked'], - 'lines': [ - ['tx', 'tx', 'incremental'] - ] - }, - 'avg_rt': { - 'options': [None, 'Average request time', 'milliseconds', 'requests', 'uwsgi.avg_rt', 'line'], - 'lines': [ - ['avg_rt', 'avg_rt', 'absolute'] - ] - }, - 'memory_rss': { - 'options': [None, 'RSS (Resident Set Size)', 'MiB', 'memory', 'uwsgi.memory_rss', 'stacked'], - 'lines': [ - ['memory_rss', 'memory_rss', 'absolute', 1, 1 << 20] - ] - }, - 'memory_vsz': { - 'options': [None, 'VSZ (Virtual Memory Size)', 'MiB', 'memory', 'uwsgi.memory_vsz', 'stacked'], - 'lines': [ - ['memory_vsz', 'memory_vsz', 'absolute', 1, 1 << 20] - ] - }, - 'exceptions': { - 'options': [None, 'Exceptions', 'exceptions', 'exceptions', 'uwsgi.exceptions', 'line'], - 'lines': [ - ['exceptions', 'exceptions', 'incremental'] - ] - }, - 'harakiri': { - 'options': [None, 'Harakiris', 'harakiris', 'harakiris', 'uwsgi.harakiris', 'line'], - 'lines': [ - ['harakiri_count', 'harakiris', 'incremental'] - ] - }, - 'respawn': { - 'options': [None, 'Respawns', 'respawns', 'respawns', 'uwsgi.respawns', 'line'], - 'lines': [ - ['respawn_count', 'respawns', 'incremental'] - ] - }, -} - - -class Service(SocketService): - def __init__(self, configuration=None, name=None): - super(Service, self).__init__(configuration=configuration, name=name) - self.order = ORDER - self.definitions = deepcopy(CHARTS) - self.url = self.configuration.get('host', 'localhost') - self.port = self.configuration.get('port', 1717) - # Clear dynamic dimensions, these are added during `_get_data()` to allow adding workers at run-time - for chart in DYNAMIC_CHARTS: - self.definitions[chart]['lines'] = [] - self.last_result = {} - self.workers = [] - - def read_data(self): - """ - Read data from socket and parse as JSON. - :return: (dict) stats - """ - raw_data = self._get_raw_data() - if not raw_data: - return None - try: - return json.loads(raw_data) - except ValueError as err: - self.error(err) - return None - - def check(self): - """ - Parse configuration and check if we can read data. - :return: boolean - """ - self._parse_config() - return bool(self.read_data()) - - def add_worker_dimensions(self, key): - """ - Helper to add dimensions for a worker. - :param key: (int or str) worker identifier - :return: - """ - for chart in DYNAMIC_CHARTS: - for line in CHARTS[chart]['lines']: - dimension_id = '{}_{}'.format(line[0], key) - dimension_name = str(key) - - dimension = [dimension_id, dimension_name] + line[2:] - self.charts[chart].add_dimension(dimension) - - @staticmethod - def _check_raw_data(data): - # The server will close the connection when it's done sending - # data, so just keep looping until that happens. - return False - - def _get_data(self): - """ - Read data from socket - :return: dict - """ - stats = self.read_data() - if not stats: - return None - - result = { - 'exceptions': 0, - 'harakiri_count': 0, - 'respawn_count': 0, - } - - for worker in stats['workers']: - key = worker['pid'] - - # Add dimensions for new workers - if key not in self.workers: - self.add_worker_dimensions(key) - self.workers.append(key) - - result['requests_{}'.format(key)] = worker['requests'] - result['tx_{}'.format(key)] = worker['tx'] - result['avg_rt_{}'.format(key)] = worker['avg_rt'] - - # avg_rt is not reset by uwsgi, so reset here - if self.last_result.get('requests_{}'.format(key)) == worker['requests']: - result['avg_rt_{}'.format(key)] = 0 - - result['memory_rss_{}'.format(key)] = worker['rss'] - result['memory_vsz_{}'.format(key)] = worker['vsz'] - - result['exceptions'] += worker['exceptions'] - result['harakiri_count'] += worker['harakiri_count'] - result['respawn_count'] += worker['respawn_count'] - - self.last_result = result - return result diff --git a/src/collectors/python.d.plugin/uwsgi/uwsgi.conf b/src/collectors/python.d.plugin/uwsgi/uwsgi.conf deleted file mode 100644 index 7d09e7330..000000000 --- a/src/collectors/python.d.plugin/uwsgi/uwsgi.conf +++ /dev/null @@ -1,92 +0,0 @@ -# netdata python.d.plugin configuration for uwsgi -# -# This file is in YaML format. Generally the format is: -# -# name: value -# -# There are 2 sections: -# - global variables -# - one or more JOBS -# -# JOBS allow you to collect values from multiple sources. -# Each source will have its own set of charts. -# -# JOB parameters have to be indented (using spaces only, example below). - -# ---------------------------------------------------------------------- -# Global Variables -# These variables set the defaults for all JOBs, however each JOB -# may define its own, overriding the defaults. - -# update_every sets the default data collection frequency. -# If unset, the python.d.plugin default is used. -# update_every: 1 - -# priority controls the order of charts at the netdata dashboard. -# Lower numbers move the charts towards the top of the page. -# If unset, the default for python.d.plugin is used. -# priority: 60000 - -# penalty indicates whether to apply penalty to update_every in case of failures. -# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes. -# penalty: yes - -# autodetection_retry sets the job re-check interval in seconds. -# The job is not deleted if check fails. -# Attempts to start the job are made once every autodetection_retry. -# This feature is disabled by default. -# autodetection_retry: 0 - -# ---------------------------------------------------------------------- -# JOBS (data collection sources) -# -# The default JOBS share the same *name*. JOBS with the same name -# are mutually exclusive. Only one of them will be allowed running at -# any time. This allows autodetection to try several alternatives and -# pick the one that works. -# -# Any number of jobs is supported. -# -# All python.d.plugin JOBS (for all its modules) support a set of -# predefined parameters. These are: -# -# job_name: -# name: myname # the JOB's name as it will appear at the -# # dashboard (by default is the job_name) -# # JOBs sharing a name are mutually exclusive -# update_every: 1 # the JOB's data collection frequency -# priority: 60000 # the JOB's order on the dashboard -# penalty: yes # the JOB's penalty -# autodetection_retry: 0 # the JOB's re-check interval in seconds -# -# Additionally to the above, uwsgi also supports the following: -# -# socket: 'path/to/uwsgistats.sock' -# -# or -# host: 'IP or HOSTNAME' # the host to connect to -# port: PORT # the port to connect to -# -# ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) -# - -socket: - name : 'local' - socket : '/tmp/stats.socket' - -localhost: - name : 'local' - host : 'localhost' - port : 1717 - -localipv4: - name : 'local' - host : '127.0.0.1' - port : 1717 - -localipv6: - name : 'local' - host : '::1' - port : 1717 diff --git a/src/collectors/python.d.plugin/varnish/integrations/varnish.md b/src/collectors/python.d.plugin/varnish/integrations/varnish.md index 64da800a3..5850dcc4c 100644 --- a/src/collectors/python.d.plugin/varnish/integrations/varnish.md +++ b/src/collectors/python.d.plugin/varnish/integrations/varnish.md @@ -188,6 +188,7 @@ job_name: ### Debug Mode + To troubleshoot issues with the `varnish` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -210,4 +211,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin varnish debug trace ``` +### Getting Logs + +If you're encountering problems with the `varnish` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep varnish +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep varnish /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep varnish +``` + diff --git a/src/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md b/src/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md index 35517aeda..15582879e 100644 --- a/src/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md +++ b/src/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md @@ -142,6 +142,7 @@ sensors: ### Debug Mode + To troubleshoot issues with the `w1sensor` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -164,4 +165,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin w1sensor debug trace ``` +### Getting Logs + +If you're encountering problems with the `w1sensor` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep w1sensor +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep w1sensor /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep w1sensor +``` + diff --git a/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md b/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md index 1aceec67d..a5d2a7e47 100644 --- a/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md +++ b/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md @@ -170,6 +170,7 @@ local: ### Debug Mode + To troubleshoot issues with the `zscores` collector, run the `python.d.plugin` with the debug option enabled. The output should give you clues as to why the collector isn't working. @@ -192,4 +193,37 @@ should give you clues as to why the collector isn't working. ./python.d.plugin zscores debug trace ``` +### Getting Logs + +If you're encountering problems with the `zscores` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep zscores +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep zscores /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep zscores +``` + diff --git a/src/collectors/systemd-journal.plugin/systemd-journal.c b/src/collectors/systemd-journal.plugin/systemd-journal.c index 57d7ecbc4..6da9c687e 100644 --- a/src/collectors/systemd-journal.plugin/systemd-journal.c +++ b/src/collectors/systemd-journal.plugin/systemd-journal.c @@ -1037,7 +1037,7 @@ static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file( struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) { sd_journal *j = NULL; - errno = 0; + errno_clear(); fstat_cache_enable_on_thread(); diff --git a/src/collectors/tc.plugin/plugin_tc.c b/src/collectors/tc.plugin/plugin_tc.c index d2599f728..da2a39194 100644 --- a/src/collectors/tc.plugin/plugin_tc.c +++ b/src/collectors/tc.plugin/plugin_tc.c @@ -834,7 +834,7 @@ static inline void tc_split_words(char *str, char **words, int max_words) { while(i < max_words) words[i++] = NULL; } -static pid_t tc_child_pid = 0; +static POPEN_INSTANCE *tc_child_instance = NULL; static void tc_main_cleanup(void *pptr) { struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); @@ -847,16 +847,10 @@ static void tc_main_cleanup(void *pptr) { collector_info("cleaning up..."); - if(tc_child_pid) { - collector_info("TC: killing with SIGTERM tc-qos-helper process %d", tc_child_pid); - if(killpid(tc_child_pid) != -1) { - siginfo_t info; - - collector_info("TC: waiting for tc plugin child process pid %d to exit...", tc_child_pid); - netdata_waitid(P_PID, (id_t) tc_child_pid, &info, WEXITED); - } - - tc_child_pid = 0; + if(tc_child_instance) { + collector_info("TC: stopping the running tc-qos-helper script"); + int code = spawn_popen_wait(tc_child_instance); (void)code; + tc_child_instance = NULL; } static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; @@ -921,21 +915,20 @@ void *tc_main(void *ptr) { char *tc_script = config_get("plugin:tc", "script to run to get tc values", command); while(service_running(SERVICE_COLLECTORS)) { - FILE *fp_child_input, *fp_child_output; struct tc_device *device = NULL; struct tc_class *class = NULL; snprintfz(command, TC_LINE_MAX, "exec %s %d", tc_script, localhost->rrd_update_every); netdata_log_debug(D_TC_LOOP, "executing '%s'", command); - fp_child_output = netdata_popen(command, (pid_t *)&tc_child_pid, &fp_child_input); - if(unlikely(!fp_child_output)) { + tc_child_instance = spawn_popen_run(command); + if(!tc_child_instance) { collector_error("TC: Cannot popen(\"%s\", \"r\").", command); goto cleanup; } char buffer[TC_LINE_MAX+1] = ""; - while(fgets(buffer, TC_LINE_MAX, fp_child_output) != NULL) { + while(fgets(buffer, TC_LINE_MAX, tc_child_instance->child_stdout_fp) != NULL) { if(unlikely(!service_running(SERVICE_COLLECTORS))) break; buffer[TC_LINE_MAX] = '\0'; @@ -1142,8 +1135,8 @@ void *tc_main(void *ptr) { } // fgets() failed or loop broke - int code = netdata_pclose(fp_child_input, fp_child_output, (pid_t)tc_child_pid); - tc_child_pid = 0; + int code = spawn_popen_kill(tc_child_instance); + tc_child_instance = NULL; if(unlikely(device)) { // tc_device_free(device); diff --git a/src/collectors/windows.plugin/perflib-network.c b/src/collectors/windows.plugin/perflib-network.c index 2f1bc3c53..ecadd1e87 100644 --- a/src/collectors/windows.plugin/perflib-network.c +++ b/src/collectors/windows.plugin/perflib-network.c @@ -312,7 +312,7 @@ static bool do_network_interface(PERF_DATA_BLOCK *pDataBlock, int update_every, d->collected_metadata = true;
}
- if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.received) ||
+ if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.received) &&
perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->traffic.sent)) {
if(d->traffic.received.current.Data == 0 && d->traffic.sent.current.Data == 0)
@@ -350,7 +350,7 @@ static bool do_network_interface(PERF_DATA_BLOCK *pDataBlock, int update_every, rrdset_done(d->traffic.st);
}
- if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.received) ||
+ if(perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.received) &&
perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &d->packets.sent)) {
if (unlikely(!d->packets.st)) {
diff --git a/src/collectors/windows.plugin/perflib-objects.c b/src/collectors/windows.plugin/perflib-objects.c new file mode 100644 index 000000000..6628ff864 --- /dev/null +++ b/src/collectors/windows.plugin/perflib-objects.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "windows_plugin.h"
+#include "windows-internals.h"
+
+#define _COMMON_PLUGIN_NAME "windows.plugin"
+#define _COMMON_PLUGIN_MODULE_NAME "PerflibObjects"
+#include "../common-contexts/common-contexts.h"
+
+static void initialize(void) {
+ ;
+}
+
+static bool do_objects(PERF_DATA_BLOCK *pDataBlock, int update_every) {
+ PERF_OBJECT_TYPE *pObjectType = perflibFindObjectTypeByName(pDataBlock, "Objects");
+ if (!pObjectType)
+ return false;
+
+ static COUNTER_DATA semaphores = { .key = "Semaphores" };
+
+ if(perflibGetObjectCounter(pDataBlock, pObjectType, &semaphores)) {
+ ULONGLONG sem = semaphores.current.Data;
+ common_semaphore_ipc(sem, WINDOWS_MAX_KERNEL_OBJECT, _COMMON_PLUGIN_MODULE_NAME, update_every);
+ }
+
+ return true;
+}
+
+int do_PerflibObjects(int update_every, usec_t dt __maybe_unused) {
+ static bool initialized = false;
+
+ if(unlikely(!initialized)) {
+ initialize();
+ initialized = true;
+ }
+
+ DWORD id = RegistryFindIDByName("Objects");
+ if(id == PERFLIB_REGISTRY_NAME_NOT_FOUND)
+ return -1;
+
+ PERF_DATA_BLOCK *pDataBlock = perflibGetPerformanceData(id);
+ if(!pDataBlock) return -1;
+
+ do_objects(pDataBlock, update_every);
+
+ return 0;
+}
diff --git a/src/collectors/windows.plugin/perflib-processor.c b/src/collectors/windows.plugin/perflib-processor.c index d149c6aad..4c7d86c90 100644 --- a/src/collectors/windows.plugin/perflib-processor.c +++ b/src/collectors/windows.plugin/perflib-processor.c @@ -3,6 +3,10 @@ #include "windows_plugin.h"
#include "windows-internals.h"
+#define _COMMON_PLUGIN_NAME "windows.plugin"
+#define _COMMON_PLUGIN_MODULE_NAME "PerflibProcesses"
+#include "../common-contexts/common-contexts.h"
+
struct processor {
bool collected_metadata;
@@ -22,6 +26,8 @@ struct processor { COUNTER_DATA percentDPCTime;
COUNTER_DATA percentInterruptTime;
COUNTER_DATA percentIdleTime;
+
+ COUNTER_DATA interruptsPerSec;
};
struct processor total = { 0 };
@@ -33,6 +39,7 @@ void initialize_processor_keys(struct processor *p) { p->percentDPCTime.key = "% DPC Time";
p->percentInterruptTime.key = "% Interrupt Time";
p->percentIdleTime.key = "% Idle Time";
+ p->interruptsPerSec.key = "Interrupts/sec";
}
void dict_processor_insert_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
@@ -57,6 +64,7 @@ static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { static const RRDVAR_ACQUIRED *cpus_var = NULL;
int cores_found = 0;
+ uint64_t totalIPC = 0;
PERF_INSTANCE_DEFINITION *pi = NULL;
for(LONG i = 0; i < pObjectType->NumInstances ; i++) {
@@ -96,6 +104,8 @@ static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentInterruptTime);
perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->percentIdleTime);
+ perflibGetInstanceCounter(pDataBlock, pObjectType, pi, &p->interruptsPerSec);
+
if(!p->st) {
p->st = rrdset_create_localhost(
is_total ? "system" : "cpu"
@@ -130,6 +140,8 @@ static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { uint64_t irq = p->percentInterruptTime.current.Data;
uint64_t idle = p->percentIdleTime.current.Data;
+ totalIPC += p->interruptsPerSec.current.Data;
+
rrddim_set_by_pointer(p->st, p->rd_user, (collected_number)user);
rrddim_set_by_pointer(p->st, p->rd_system, (collected_number)system);
rrddim_set_by_pointer(p->st, p->rd_irq, (collected_number)irq);
@@ -167,6 +179,8 @@ static bool do_processors(PERF_DATA_BLOCK *pDataBlock, int update_every) { if(cpus_var)
rrdvar_host_variable_set(localhost, cpus_var, cores_found);
+ common_interrupts(totalIPC, update_every, NULL);
+
return true;
}
diff --git a/src/collectors/windows.plugin/windows_plugin.c b/src/collectors/windows.plugin/windows_plugin.c index 2d357b9b1..35ef857be 100644 --- a/src/collectors/windows.plugin/windows_plugin.c +++ b/src/collectors/windows.plugin/windows_plugin.c @@ -24,6 +24,7 @@ static struct proc_module { {.name = "PerflibMemory", .dim = "PerflibMemory", .func = do_PerflibMemory}, {.name = "PerflibStorage", .dim = "PerflibStorage", .func = do_PerflibStorage}, {.name = "PerflibNetwork", .dim = "PerflibNetwork", .func = do_PerflibNetwork}, + {.name = "PerflibObjects", .dim = "PerflibObjects", .func = do_PerflibObjects}, // the terminator of this array {.name = NULL, .dim = NULL, .func = NULL} diff --git a/src/collectors/windows.plugin/windows_plugin.h b/src/collectors/windows.plugin/windows_plugin.h index f76b9a782..73c1ecda1 100644 --- a/src/collectors/windows.plugin/windows_plugin.h +++ b/src/collectors/windows.plugin/windows_plugin.h @@ -7,6 +7,10 @@ #define PLUGIN_WINDOWS_NAME "windows.plugin" +// https://learn.microsoft.com/es-es/windows/win32/sysinfo/kernel-objects?redirectedfrom=MSDN +// 2^24 +#define WINDOWS_MAX_KERNEL_OBJECT 16777216 + void *win_plugin_main(void *ptr); extern char windows_shared_buffer[8192]; @@ -19,6 +23,7 @@ int do_PerflibNetwork(int update_every, usec_t dt); int do_PerflibProcesses(int update_every, usec_t dt); int do_PerflibProcessor(int update_every, usec_t dt); int do_PerflibMemory(int update_every, usec_t dt); +int do_PerflibObjects(int update_every, usec_t dt); #include "perflib.h" diff --git a/src/collectors/xenstat.plugin/xenstat_plugin.c b/src/collectors/xenstat.plugin/xenstat_plugin.c index b17b746f5..e4b8a2bd0 100644 --- a/src/collectors/xenstat.plugin/xenstat_plugin.c +++ b/src/collectors/xenstat.plugin/xenstat_plugin.c @@ -986,7 +986,7 @@ int main(int argc, char **argv) { netdata_log_error("xenstat.plugin: ignoring parameter '%s'", argv[i]); } - errno = 0; + errno_clear(); if(freq >= netdata_update_every) netdata_update_every = freq; |