diff options
Diffstat (limited to '')
41 files changed, 6024 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/docker/README.md b/src/go/collectors/go.d.plugin/modules/docker/README.md new file mode 120000 index 000000000..b4804ee06 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/README.md @@ -0,0 +1 @@ +integrations/docker.md
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/docker/charts.go b/src/go/collectors/go.d.plugin/modules/docker/charts.go new file mode 100644 index 000000000..2dd26c0e3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/charts.go @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioContainersState = module.Priority + iota + prioContainersHealthy + + prioContainerState + prioContainerHealthStatus + prioContainerWritableLayerSize + + prioImagesCount + prioImagesSize +) + +var summaryCharts = module.Charts{ + containersStateChart.Copy(), + containersHealthyChart.Copy(), + + imagesCountChart.Copy(), + imagesSizeChart.Copy(), +} + +var ( + containersStateChart = module.Chart{ + ID: "containers_state", + Title: "Total number of Docker containers in various states", + Units: "containers", + Fam: "containers", + Ctx: "docker.containers_state", + Priority: prioContainersState, + Type: module.Stacked, + Dims: module.Dims{ + {ID: "containers_state_running", Name: "running"}, + {ID: "containers_state_paused", Name: "paused"}, + {ID: "containers_state_exited", Name: "exited"}, + }, + } + containersHealthyChart = module.Chart{ + ID: "healthy_containers", + Title: "Total number of Docker containers in various health states", + Units: "containers", + Fam: "containers", + Ctx: "docker.containers_health_status", + Priority: prioContainersHealthy, + Dims: module.Dims{ + {ID: "containers_health_status_healthy", Name: "healthy"}, + {ID: "containers_health_status_unhealthy", Name: "unhealthy"}, + {ID: "containers_health_status_not_running_unhealthy", Name: "not_running_unhealthy"}, + {ID: "containers_health_status_starting", Name: "starting"}, + {ID: "containers_health_status_none", Name: "no_healthcheck"}, + }, + } +) + +var ( + imagesCountChart = module.Chart{ + ID: "images_count", + Title: "Total number of Docker images in various states", + Units: "images", + Fam: "images", + Ctx: "docker.images", + Priority: prioImagesCount, + Type: module.Stacked, + Dims: module.Dims{ + {ID: "images_active", Name: "active"}, + {ID: "images_dangling", Name: "dangling"}, + }, + } + imagesSizeChart = module.Chart{ + ID: "images_size", + Title: "Total size of all Docker images", + Units: "bytes", + Fam: "images", + Ctx: "docker.images_size", + Priority: prioImagesSize, + Dims: module.Dims{ + {ID: "images_size", Name: "size"}, + }, + } +) + +var ( + containerChartsTmpl = module.Charts{ + containerStateChartTmpl.Copy(), + containerHealthStatusChartTmpl.Copy(), + containerWritableLayerSizeChartTmpl.Copy(), + } + + containerStateChartTmpl = module.Chart{ + ID: "container_%s_state", + Title: "Docker container state", + Units: "state", + Fam: "containers", + Ctx: "docker.container_state", + Priority: prioContainerState, + Dims: module.Dims{ + {ID: "container_%s_state_running", Name: "running"}, + {ID: "container_%s_state_paused", Name: "paused"}, + {ID: "container_%s_state_exited", Name: "exited"}, + {ID: "container_%s_state_created", Name: "created"}, + {ID: "container_%s_state_restarting", Name: "restarting"}, + {ID: "container_%s_state_removing", Name: "removing"}, + {ID: "container_%s_state_dead", Name: "dead"}, + }, + } + containerHealthStatusChartTmpl = module.Chart{ + ID: "container_%s_health_status", + Title: "Docker container health status", + Units: "status", + Fam: "containers", + Ctx: "docker.container_health_status", + Priority: prioContainerHealthStatus, + Dims: module.Dims{ + {ID: "container_%s_health_status_healthy", Name: "healthy"}, + {ID: "container_%s_health_status_unhealthy", Name: "unhealthy"}, + {ID: "container_%s_health_status_not_running_unhealthy", Name: "not_running_unhealthy"}, + {ID: "container_%s_health_status_starting", Name: "starting"}, + {ID: "container_%s_health_status_none", Name: "no_healthcheck"}, + }, + } + containerWritableLayerSizeChartTmpl = module.Chart{ + ID: "container_%s_writable_layer_size", + Title: "Docker container writable layer size", + Units: "bytes", + Fam: "containers", + Ctx: "docker.container_writeable_layer_size", + Priority: prioContainerWritableLayerSize, + Dims: module.Dims{ + {ID: "container_%s_size_rw", Name: "writable_layer"}, + }, + } +) + +func (d *Docker) addContainerCharts(name, image string) { + charts := containerChartsTmpl.Copy() + if !d.CollectContainerSize { + _ = charts.Remove(containerWritableLayerSizeChartTmpl.ID) + } + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, name) + chart.Labels = []module.Label{ + {Key: "container_name", Value: name}, + {Key: "image", Value: image}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, name) + } + } + + if err := d.Charts().Add(*charts...); err != nil { + d.Warning(err) + } +} + +func (d *Docker) removeContainerCharts(name string) { + px := fmt.Sprintf("container_%s", name) + + for _, chart := range *d.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/collect.go b/src/go/collectors/go.d.plugin/modules/docker/collect.go new file mode 100644 index 000000000..7b5af7cab --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/collect.go @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + "fmt" + "strings" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/filters" +) + +func (d *Docker) collect() (map[string]int64, error) { + if d.client == nil { + client, err := d.newClient(d.Config) + if err != nil { + return nil, err + } + d.client = client + } + + if !d.verNegotiated { + d.verNegotiated = true + d.negotiateAPIVersion() + } + + defer func() { _ = d.client.Close() }() + + mx := make(map[string]int64) + + if err := d.collectInfo(mx); err != nil { + return nil, err + } + if err := d.collectImages(mx); err != nil { + return nil, err + } + if err := d.collectContainers(mx); err != nil { + return nil, err + } + + return mx, nil +} + +func (d *Docker) collectInfo(mx map[string]int64) error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + info, err := d.client.Info(ctx) + if err != nil { + return err + } + + mx["containers_state_running"] = int64(info.ContainersRunning) + mx["containers_state_paused"] = int64(info.ContainersPaused) + mx["containers_state_exited"] = int64(info.ContainersStopped) + + return nil +} + +func (d *Docker) collectImages(mx map[string]int64) error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + images, err := d.client.ImageList(ctx, types.ImageListOptions{}) + if err != nil { + return err + } + + mx["images_size"] = 0 + mx["images_dangling"] = 0 + mx["images_active"] = 0 + + for _, v := range images { + mx["images_size"] += v.Size + if v.Containers == 0 { + mx["images_dangling"]++ + } else { + mx["images_active"]++ + } + } + + return nil +} + +var ( + containerHealthStatuses = []string{ + types.Healthy, + types.Unhealthy, + types.Starting, + types.NoHealthcheck, + } + containerStates = []string{ + "created", + "running", + "paused", + "restarting", + "removing", + "exited", + "dead", + } +) + +func (d *Docker) collectContainers(mx map[string]int64) error { + containerSet := make(map[string][]types.Container) + + for _, status := range containerHealthStatuses { + if err := func() error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + v, err := d.client.ContainerList(ctx, typesContainer.ListOptions{ + All: true, + Filters: filters.NewArgs(filters.KeyValuePair{Key: "health", Value: status}), + Size: d.CollectContainerSize, + }) + if err != nil { + return err + } + containerSet[status] = v + return nil + + }(); err != nil { + return err + } + } + + seen := make(map[string]bool) + + for _, s := range containerHealthStatuses { + mx["containers_health_status_"+s] = 0 + } + mx["containers_health_status_not_running_unhealthy"] = 0 + + for status, containers := range containerSet { + if status != types.Unhealthy { + mx["containers_health_status_"+status] = int64(len(containers)) + } + + for _, cntr := range containers { + if status == types.Unhealthy { + if cntr.State == "running" { + mx["containers_health_status_"+status] += 1 + } else { + mx["containers_health_status_not_running_unhealthy"] += 1 + } + } + + if len(cntr.Names) == 0 { + continue + } + + name := strings.TrimPrefix(cntr.Names[0], "/") + + seen[name] = true + + if !d.containers[name] { + d.containers[name] = true + d.addContainerCharts(name, cntr.Image) + } + + px := fmt.Sprintf("container_%s_", name) + + for _, s := range containerHealthStatuses { + mx[px+"health_status_"+s] = 0 + } + mx[px+"health_status_not_running_unhealthy"] = 0 + for _, s := range containerStates { + mx[px+"state_"+s] = 0 + } + + if status == types.Unhealthy && cntr.State != "running" { + mx[px+"health_status_not_running_unhealthy"] += 1 + } else { + mx[px+"health_status_"+status] = 1 + } + mx[px+"state_"+cntr.State] = 1 + mx[px+"size_rw"] = cntr.SizeRw + mx[px+"size_root_fs"] = cntr.SizeRootFs + } + } + + for name := range d.containers { + if !seen[name] { + delete(d.containers, name) + d.removeContainerCharts(name) + } + } + + return nil +} + +func (d *Docker) negotiateAPIVersion() { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + d.client.NegotiateAPIVersion(ctx) +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/config_schema.json b/src/go/collectors/go.d.plugin/modules/docker/config_schema.json new file mode 100644 index 000000000..bd48c9126 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/config_schema.json @@ -0,0 +1,52 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Docker collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "address": { + "title": "Address", + "description": "Docker daemon's Unix or TCP (listening address) socket.", + "type": "string", + "default": "unix:///var/run/docker.sock" + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for establishing a connection and communication (reading and writing) in seconds.", + "type": "number", + "default": 2 + }, + "collect_container_size": { + "title": "Collect container size", + "description": "Collect container writable layer size.", + "type": "boolean", + "default": false + } + }, + "required": [ + "address" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "address": { + "ui:help": "Use `unix://{path_to_socket}` for Unix socket or `tcp://{ip}:{port}` for TCP socket." + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/docker.go b/src/go/collectors/go.d.plugin/modules/docker/docker.go new file mode 100644 index 000000000..7328a7ca6 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/docker.go @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + typesImage "github.com/docker/docker/api/types/image" + typesSystem "github.com/docker/docker/api/types/system" + docker "github.com/docker/docker/client" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("docker", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + }) +} + +func New() *Docker { + return &Docker{ + Config: Config{ + Address: docker.DefaultDockerHost, + Timeout: web.Duration(time.Second * 2), + CollectContainerSize: false, + }, + + charts: summaryCharts.Copy(), + newClient: func(cfg Config) (dockerClient, error) { + return docker.NewClientWithOpts(docker.WithHost(cfg.Address)) + }, + containers: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Address string `yaml:"address" json:"address"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` + CollectContainerSize bool `yaml:"collect_container_size" json:"collect_container_size"` +} + +type ( + Docker struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + client dockerClient + newClient func(Config) (dockerClient, error) + + verNegotiated bool + containers map[string]bool + } + dockerClient interface { + NegotiateAPIVersion(context.Context) + Info(context.Context) (typesSystem.Info, error) + ImageList(context.Context, types.ImageListOptions) ([]typesImage.Summary, error) + ContainerList(context.Context, typesContainer.ListOptions) ([]types.Container, error) + Close() error + } +) + +func (d *Docker) Configuration() any { + return d.Config +} + +func (d *Docker) Init() error { + return nil +} + +func (d *Docker) Check() error { + mx, err := d.collect() + if err != nil { + d.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + + } + return nil +} + +func (d *Docker) Charts() *module.Charts { + return d.charts +} + +func (d *Docker) Collect() map[string]int64 { + mx, err := d.collect() + if err != nil { + d.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (d *Docker) Cleanup() { + if d.client == nil { + return + } + if err := d.client.Close(); err != nil { + d.Warningf("error on closing docker client: %v", err) + } + d.client = nil +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/docker_test.go b/src/go/collectors/go.d.plugin/modules/docker/docker_test.go new file mode 100644 index 000000000..934178b9a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/docker_test.go @@ -0,0 +1,852 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + "errors" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + typesImage "github.com/docker/docker/api/types/image" + typesSystem "github.com/docker/docker/api/types/system" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + } { + require.NotNil(t, data, name) + } +} + +func TestDocker_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &Docker{}, dataConfigJSON, dataConfigYAML) +} + +func TestDocker_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "default config": { + wantFail: false, + config: New().Config, + }, + "unset 'address'": { + wantFail: false, + config: Config{ + Address: "", + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := New() + d.Config = test.config + + if test.wantFail { + assert.Error(t, d.Init()) + } else { + assert.NoError(t, d.Init()) + } + }) + } +} + +func TestDocker_Charts(t *testing.T) { + assert.Equal(t, len(summaryCharts), len(*New().Charts())) +} + +func TestDocker_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func(d *Docker) + wantClose bool + }{ + "after New": { + wantClose: false, + prepare: func(d *Docker) {}, + }, + "after Init": { + wantClose: false, + prepare: func(d *Docker) { _ = d.Init() }, + }, + "after Check": { + wantClose: true, + prepare: func(d *Docker) { _ = d.Init(); _ = d.Check() }, + }, + "after Collect": { + wantClose: true, + prepare: func(d *Docker) { _ = d.Init(); d.Collect() }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + m := &mockClient{} + d := New() + d.newClient = prepareNewClientFunc(m) + + test.prepare(d) + + require.NotPanics(t, d.Cleanup) + + if test.wantClose { + assert.True(t, m.closeCalled) + } else { + assert.False(t, m.closeCalled) + } + }) + } +} + +func TestDocker_Check(t *testing.T) { + tests := map[string]struct { + prepare func() *Docker + wantFail bool + }{ + "case success": { + wantFail: false, + prepare: func() *Docker { + return prepareCaseSuccess() + }, + }, + "case success without container size": { + wantFail: false, + prepare: func() *Docker { + return prepareCaseSuccessWithoutContainerSize() + }, + }, + "fail on case err on Info()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnInfo() + }, + }, + "fail on case err on ImageList()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnImageList() + }, + }, + "fail on case err on ContainerList()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnContainerList() + }, + }, + "fail on case err on creating Docker client": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrCreatingClient() + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := test.prepare() + + require.NoError(t, d.Init()) + + if test.wantFail { + assert.Error(t, d.Check()) + } else { + assert.NoError(t, d.Check()) + } + }) + } +} + +func TestDocker_Collect(t *testing.T) { + tests := map[string]struct { + prepare func() *Docker + expected map[string]int64 + }{ + "case success": { + prepare: func() *Docker { + return prepareCaseSuccess() + }, + expected: map[string]int64{ + "container_container10_health_status_healthy": 0, + "container_container10_health_status_none": 0, + "container_container10_health_status_not_running_unhealthy": 1, + "container_container10_health_status_starting": 0, + "container_container10_health_status_unhealthy": 0, + "container_container10_size_root_fs": 0, + "container_container10_size_rw": 0, + "container_container10_state_created": 0, + "container_container10_state_dead": 1, + "container_container10_state_exited": 0, + "container_container10_state_paused": 0, + "container_container10_state_removing": 0, + "container_container10_state_restarting": 0, + "container_container10_state_running": 0, + "container_container11_health_status_healthy": 0, + "container_container11_health_status_none": 0, + "container_container11_health_status_not_running_unhealthy": 0, + "container_container11_health_status_starting": 1, + "container_container11_health_status_unhealthy": 0, + "container_container11_size_root_fs": 0, + "container_container11_size_rw": 0, + "container_container11_state_created": 0, + "container_container11_state_dead": 0, + "container_container11_state_exited": 0, + "container_container11_state_paused": 0, + "container_container11_state_removing": 1, + "container_container11_state_restarting": 0, + "container_container11_state_running": 0, + "container_container12_health_status_healthy": 0, + "container_container12_health_status_none": 0, + "container_container12_health_status_not_running_unhealthy": 0, + "container_container12_health_status_starting": 1, + "container_container12_health_status_unhealthy": 0, + "container_container12_size_root_fs": 0, + "container_container12_size_rw": 0, + "container_container12_state_created": 0, + "container_container12_state_dead": 0, + "container_container12_state_exited": 1, + "container_container12_state_paused": 0, + "container_container12_state_removing": 0, + "container_container12_state_restarting": 0, + "container_container12_state_running": 0, + "container_container13_health_status_healthy": 0, + "container_container13_health_status_none": 0, + "container_container13_health_status_not_running_unhealthy": 0, + "container_container13_health_status_starting": 1, + "container_container13_health_status_unhealthy": 0, + "container_container13_size_root_fs": 0, + "container_container13_size_rw": 0, + "container_container13_state_created": 0, + "container_container13_state_dead": 0, + "container_container13_state_exited": 1, + "container_container13_state_paused": 0, + "container_container13_state_removing": 0, + "container_container13_state_restarting": 0, + "container_container13_state_running": 0, + "container_container14_health_status_healthy": 0, + "container_container14_health_status_none": 1, + "container_container14_health_status_not_running_unhealthy": 0, + "container_container14_health_status_starting": 0, + "container_container14_health_status_unhealthy": 0, + "container_container14_size_root_fs": 0, + "container_container14_size_rw": 0, + "container_container14_state_created": 0, + "container_container14_state_dead": 1, + "container_container14_state_exited": 0, + "container_container14_state_paused": 0, + "container_container14_state_removing": 0, + "container_container14_state_restarting": 0, + "container_container14_state_running": 0, + "container_container15_health_status_healthy": 0, + "container_container15_health_status_none": 1, + "container_container15_health_status_not_running_unhealthy": 0, + "container_container15_health_status_starting": 0, + "container_container15_health_status_unhealthy": 0, + "container_container15_size_root_fs": 0, + "container_container15_size_rw": 0, + "container_container15_state_created": 0, + "container_container15_state_dead": 1, + "container_container15_state_exited": 0, + "container_container15_state_paused": 0, + "container_container15_state_removing": 0, + "container_container15_state_restarting": 0, + "container_container15_state_running": 0, + "container_container16_health_status_healthy": 0, + "container_container16_health_status_none": 1, + "container_container16_health_status_not_running_unhealthy": 0, + "container_container16_health_status_starting": 0, + "container_container16_health_status_unhealthy": 0, + "container_container16_size_root_fs": 0, + "container_container16_size_rw": 0, + "container_container16_state_created": 0, + "container_container16_state_dead": 1, + "container_container16_state_exited": 0, + "container_container16_state_paused": 0, + "container_container16_state_removing": 0, + "container_container16_state_restarting": 0, + "container_container16_state_running": 0, + "container_container1_health_status_healthy": 1, + "container_container1_health_status_none": 0, + "container_container1_health_status_not_running_unhealthy": 0, + "container_container1_health_status_starting": 0, + "container_container1_health_status_unhealthy": 0, + "container_container1_size_root_fs": 0, + "container_container1_size_rw": 0, + "container_container1_state_created": 1, + "container_container1_state_dead": 0, + "container_container1_state_exited": 0, + "container_container1_state_paused": 0, + "container_container1_state_removing": 0, + "container_container1_state_restarting": 0, + "container_container1_state_running": 0, + "container_container2_health_status_healthy": 1, + "container_container2_health_status_none": 0, + "container_container2_health_status_not_running_unhealthy": 0, + "container_container2_health_status_starting": 0, + "container_container2_health_status_unhealthy": 0, + "container_container2_size_root_fs": 0, + "container_container2_size_rw": 0, + "container_container2_state_created": 0, + "container_container2_state_dead": 0, + "container_container2_state_exited": 0, + "container_container2_state_paused": 0, + "container_container2_state_removing": 0, + "container_container2_state_restarting": 0, + "container_container2_state_running": 1, + "container_container3_health_status_healthy": 1, + "container_container3_health_status_none": 0, + "container_container3_health_status_not_running_unhealthy": 0, + "container_container3_health_status_starting": 0, + "container_container3_health_status_unhealthy": 0, + "container_container3_size_root_fs": 0, + "container_container3_size_rw": 0, + "container_container3_state_created": 0, + "container_container3_state_dead": 0, + "container_container3_state_exited": 0, + "container_container3_state_paused": 0, + "container_container3_state_removing": 0, + "container_container3_state_restarting": 0, + "container_container3_state_running": 1, + "container_container4_health_status_healthy": 0, + "container_container4_health_status_none": 0, + "container_container4_health_status_not_running_unhealthy": 1, + "container_container4_health_status_starting": 0, + "container_container4_health_status_unhealthy": 0, + "container_container4_size_root_fs": 0, + "container_container4_size_rw": 0, + "container_container4_state_created": 1, + "container_container4_state_dead": 0, + "container_container4_state_exited": 0, + "container_container4_state_paused": 0, + "container_container4_state_removing": 0, + "container_container4_state_restarting": 0, + "container_container4_state_running": 0, + "container_container5_health_status_healthy": 0, + "container_container5_health_status_none": 0, + "container_container5_health_status_not_running_unhealthy": 0, + "container_container5_health_status_starting": 0, + "container_container5_health_status_unhealthy": 1, + "container_container5_size_root_fs": 0, + "container_container5_size_rw": 0, + "container_container5_state_created": 0, + "container_container5_state_dead": 0, + "container_container5_state_exited": 0, + "container_container5_state_paused": 0, + "container_container5_state_removing": 0, + "container_container5_state_restarting": 0, + "container_container5_state_running": 1, + "container_container6_health_status_healthy": 0, + "container_container6_health_status_none": 0, + "container_container6_health_status_not_running_unhealthy": 1, + "container_container6_health_status_starting": 0, + "container_container6_health_status_unhealthy": 0, + "container_container6_size_root_fs": 0, + "container_container6_size_rw": 0, + "container_container6_state_created": 0, + "container_container6_state_dead": 0, + "container_container6_state_exited": 0, + "container_container6_state_paused": 1, + "container_container6_state_removing": 0, + "container_container6_state_restarting": 0, + "container_container6_state_running": 0, + "container_container7_health_status_healthy": 0, + "container_container7_health_status_none": 0, + "container_container7_health_status_not_running_unhealthy": 1, + "container_container7_health_status_starting": 0, + "container_container7_health_status_unhealthy": 0, + "container_container7_size_root_fs": 0, + "container_container7_size_rw": 0, + "container_container7_state_created": 0, + "container_container7_state_dead": 0, + "container_container7_state_exited": 0, + "container_container7_state_paused": 0, + "container_container7_state_removing": 0, + "container_container7_state_restarting": 1, + "container_container7_state_running": 0, + "container_container8_health_status_healthy": 0, + "container_container8_health_status_none": 0, + "container_container8_health_status_not_running_unhealthy": 1, + "container_container8_health_status_starting": 0, + "container_container8_health_status_unhealthy": 0, + "container_container8_size_root_fs": 0, + "container_container8_size_rw": 0, + "container_container8_state_created": 0, + "container_container8_state_dead": 0, + "container_container8_state_exited": 0, + "container_container8_state_paused": 0, + "container_container8_state_removing": 1, + "container_container8_state_restarting": 0, + "container_container8_state_running": 0, + "container_container9_health_status_healthy": 0, + "container_container9_health_status_none": 0, + "container_container9_health_status_not_running_unhealthy": 1, + "container_container9_health_status_starting": 0, + "container_container9_health_status_unhealthy": 0, + "container_container9_size_root_fs": 0, + "container_container9_size_rw": 0, + "container_container9_state_created": 0, + "container_container9_state_dead": 0, + "container_container9_state_exited": 1, + "container_container9_state_paused": 0, + "container_container9_state_removing": 0, + "container_container9_state_restarting": 0, + "container_container9_state_running": 0, + "containers_health_status_healthy": 3, + "containers_health_status_none": 3, + "containers_health_status_not_running_unhealthy": 6, + "containers_health_status_starting": 3, + "containers_health_status_unhealthy": 1, + "containers_state_exited": 6, + "containers_state_paused": 5, + "containers_state_running": 4, + "images_active": 1, + "images_dangling": 1, + "images_size": 300, + }, + }, + "case success without container size": { + prepare: func() *Docker { + return prepareCaseSuccessWithoutContainerSize() + }, + expected: map[string]int64{ + "container_container10_health_status_healthy": 0, + "container_container10_health_status_none": 0, + "container_container10_health_status_not_running_unhealthy": 1, + "container_container10_health_status_starting": 0, + "container_container10_health_status_unhealthy": 0, + "container_container10_size_root_fs": 0, + "container_container10_size_rw": 0, + "container_container10_state_created": 0, + "container_container10_state_dead": 1, + "container_container10_state_exited": 0, + "container_container10_state_paused": 0, + "container_container10_state_removing": 0, + "container_container10_state_restarting": 0, + "container_container10_state_running": 0, + "container_container11_health_status_healthy": 0, + "container_container11_health_status_none": 0, + "container_container11_health_status_not_running_unhealthy": 0, + "container_container11_health_status_starting": 1, + "container_container11_health_status_unhealthy": 0, + "container_container11_size_root_fs": 0, + "container_container11_size_rw": 0, + "container_container11_state_created": 0, + "container_container11_state_dead": 0, + "container_container11_state_exited": 0, + "container_container11_state_paused": 0, + "container_container11_state_removing": 1, + "container_container11_state_restarting": 0, + "container_container11_state_running": 0, + "container_container12_health_status_healthy": 0, + "container_container12_health_status_none": 0, + "container_container12_health_status_not_running_unhealthy": 0, + "container_container12_health_status_starting": 1, + "container_container12_health_status_unhealthy": 0, + "container_container12_size_root_fs": 0, + "container_container12_size_rw": 0, + "container_container12_state_created": 0, + "container_container12_state_dead": 0, + "container_container12_state_exited": 1, + "container_container12_state_paused": 0, + "container_container12_state_removing": 0, + "container_container12_state_restarting": 0, + "container_container12_state_running": 0, + "container_container13_health_status_healthy": 0, + "container_container13_health_status_none": 0, + "container_container13_health_status_not_running_unhealthy": 0, + "container_container13_health_status_starting": 1, + "container_container13_health_status_unhealthy": 0, + "container_container13_size_root_fs": 0, + "container_container13_size_rw": 0, + "container_container13_state_created": 0, + "container_container13_state_dead": 0, + "container_container13_state_exited": 1, + "container_container13_state_paused": 0, + "container_container13_state_removing": 0, + "container_container13_state_restarting": 0, + "container_container13_state_running": 0, + "container_container14_health_status_healthy": 0, + "container_container14_health_status_none": 1, + "container_container14_health_status_not_running_unhealthy": 0, + "container_container14_health_status_starting": 0, + "container_container14_health_status_unhealthy": 0, + "container_container14_size_root_fs": 0, + "container_container14_size_rw": 0, + "container_container14_state_created": 0, + "container_container14_state_dead": 1, + "container_container14_state_exited": 0, + "container_container14_state_paused": 0, + "container_container14_state_removing": 0, + "container_container14_state_restarting": 0, + "container_container14_state_running": 0, + "container_container15_health_status_healthy": 0, + "container_container15_health_status_none": 1, + "container_container15_health_status_not_running_unhealthy": 0, + "container_container15_health_status_starting": 0, + "container_container15_health_status_unhealthy": 0, + "container_container15_size_root_fs": 0, + "container_container15_size_rw": 0, + "container_container15_state_created": 0, + "container_container15_state_dead": 1, + "container_container15_state_exited": 0, + "container_container15_state_paused": 0, + "container_container15_state_removing": 0, + "container_container15_state_restarting": 0, + "container_container15_state_running": 0, + "container_container16_health_status_healthy": 0, + "container_container16_health_status_none": 1, + "container_container16_health_status_not_running_unhealthy": 0, + "container_container16_health_status_starting": 0, + "container_container16_health_status_unhealthy": 0, + "container_container16_size_root_fs": 0, + "container_container16_size_rw": 0, + "container_container16_state_created": 0, + "container_container16_state_dead": 1, + "container_container16_state_exited": 0, + "container_container16_state_paused": 0, + "container_container16_state_removing": 0, + "container_container16_state_restarting": 0, + "container_container16_state_running": 0, + "container_container1_health_status_healthy": 1, + "container_container1_health_status_none": 0, + "container_container1_health_status_not_running_unhealthy": 0, + "container_container1_health_status_starting": 0, + "container_container1_health_status_unhealthy": 0, + "container_container1_size_root_fs": 0, + "container_container1_size_rw": 0, + "container_container1_state_created": 1, + "container_container1_state_dead": 0, + "container_container1_state_exited": 0, + "container_container1_state_paused": 0, + "container_container1_state_removing": 0, + "container_container1_state_restarting": 0, + "container_container1_state_running": 0, + "container_container2_health_status_healthy": 1, + "container_container2_health_status_none": 0, + "container_container2_health_status_not_running_unhealthy": 0, + "container_container2_health_status_starting": 0, + "container_container2_health_status_unhealthy": 0, + "container_container2_size_root_fs": 0, + "container_container2_size_rw": 0, + "container_container2_state_created": 0, + "container_container2_state_dead": 0, + "container_container2_state_exited": 0, + "container_container2_state_paused": 0, + "container_container2_state_removing": 0, + "container_container2_state_restarting": 0, + "container_container2_state_running": 1, + "container_container3_health_status_healthy": 1, + "container_container3_health_status_none": 0, + "container_container3_health_status_not_running_unhealthy": 0, + "container_container3_health_status_starting": 0, + "container_container3_health_status_unhealthy": 0, + "container_container3_size_root_fs": 0, + "container_container3_size_rw": 0, + "container_container3_state_created": 0, + "container_container3_state_dead": 0, + "container_container3_state_exited": 0, + "container_container3_state_paused": 0, + "container_container3_state_removing": 0, + "container_container3_state_restarting": 0, + "container_container3_state_running": 1, + "container_container4_health_status_healthy": 0, + "container_container4_health_status_none": 0, + "container_container4_health_status_not_running_unhealthy": 1, + "container_container4_health_status_starting": 0, + "container_container4_health_status_unhealthy": 0, + "container_container4_size_root_fs": 0, + "container_container4_size_rw": 0, + "container_container4_state_created": 1, + "container_container4_state_dead": 0, + "container_container4_state_exited": 0, + "container_container4_state_paused": 0, + "container_container4_state_removing": 0, + "container_container4_state_restarting": 0, + "container_container4_state_running": 0, + "container_container5_health_status_healthy": 0, + "container_container5_health_status_none": 0, + "container_container5_health_status_not_running_unhealthy": 0, + "container_container5_health_status_starting": 0, + "container_container5_health_status_unhealthy": 1, + "container_container5_size_root_fs": 0, + "container_container5_size_rw": 0, + "container_container5_state_created": 0, + "container_container5_state_dead": 0, + "container_container5_state_exited": 0, + "container_container5_state_paused": 0, + "container_container5_state_removing": 0, + "container_container5_state_restarting": 0, + "container_container5_state_running": 1, + "container_container6_health_status_healthy": 0, + "container_container6_health_status_none": 0, + "container_container6_health_status_not_running_unhealthy": 1, + "container_container6_health_status_starting": 0, + "container_container6_health_status_unhealthy": 0, + "container_container6_size_root_fs": 0, + "container_container6_size_rw": 0, + "container_container6_state_created": 0, + "container_container6_state_dead": 0, + "container_container6_state_exited": 0, + "container_container6_state_paused": 1, + "container_container6_state_removing": 0, + "container_container6_state_restarting": 0, + "container_container6_state_running": 0, + "container_container7_health_status_healthy": 0, + "container_container7_health_status_none": 0, + "container_container7_health_status_not_running_unhealthy": 1, + "container_container7_health_status_starting": 0, + "container_container7_health_status_unhealthy": 0, + "container_container7_size_root_fs": 0, + "container_container7_size_rw": 0, + "container_container7_state_created": 0, + "container_container7_state_dead": 0, + "container_container7_state_exited": 0, + "container_container7_state_paused": 0, + "container_container7_state_removing": 0, + "container_container7_state_restarting": 1, + "container_container7_state_running": 0, + "container_container8_health_status_healthy": 0, + "container_container8_health_status_none": 0, + "container_container8_health_status_not_running_unhealthy": 1, + "container_container8_health_status_starting": 0, + "container_container8_health_status_unhealthy": 0, + "container_container8_size_root_fs": 0, + "container_container8_size_rw": 0, + "container_container8_state_created": 0, + "container_container8_state_dead": 0, + "container_container8_state_exited": 0, + "container_container8_state_paused": 0, + "container_container8_state_removing": 1, + "container_container8_state_restarting": 0, + "container_container8_state_running": 0, + "container_container9_health_status_healthy": 0, + "container_container9_health_status_none": 0, + "container_container9_health_status_not_running_unhealthy": 1, + "container_container9_health_status_starting": 0, + "container_container9_health_status_unhealthy": 0, + "container_container9_size_root_fs": 0, + "container_container9_size_rw": 0, + "container_container9_state_created": 0, + "container_container9_state_dead": 0, + "container_container9_state_exited": 1, + "container_container9_state_paused": 0, + "container_container9_state_removing": 0, + "container_container9_state_restarting": 0, + "container_container9_state_running": 0, + "containers_health_status_healthy": 3, + "containers_health_status_none": 3, + "containers_health_status_not_running_unhealthy": 6, + "containers_health_status_starting": 3, + "containers_health_status_unhealthy": 1, + "containers_state_exited": 6, + "containers_state_paused": 5, + "containers_state_running": 4, + "images_active": 1, + "images_dangling": 1, + "images_size": 300, + }, + }, + "fail on case err on Info()": { + prepare: func() *Docker { + return prepareCaseErrOnInfo() + }, + expected: nil, + }, + "fail on case err on ImageList()": { + prepare: func() *Docker { + return prepareCaseErrOnImageList() + }, + expected: nil, + }, + "fail on case err on ContainerList()": { + prepare: func() *Docker { + return prepareCaseErrOnContainerList() + }, + expected: nil, + }, + "fail on case err on creating Docker client": { + prepare: func() *Docker { + return prepareCaseErrCreatingClient() + }, + expected: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := test.prepare() + + require.NoError(t, d.Init()) + + mx := d.Collect() + + require.Equal(t, test.expected, mx) + + if d.client != nil { + m, ok := d.client.(*mockClient) + require.True(t, ok) + require.True(t, m.negotiateAPIVersionCalled) + } + + }) + } +} + +func prepareCaseSuccess() *Docker { + d := New() + d.CollectContainerSize = true + d.newClient = prepareNewClientFunc(&mockClient{}) + return d +} + +func prepareCaseSuccessWithoutContainerSize() *Docker { + d := New() + d.CollectContainerSize = false + d.newClient = prepareNewClientFunc(&mockClient{}) + return d +} + +func prepareCaseErrOnInfo() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnInfo: true}) + return d +} + +func prepareCaseErrOnImageList() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnImageList: true}) + return d +} + +func prepareCaseErrOnContainerList() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnContainerList: true}) + return d +} + +func prepareCaseErrCreatingClient() *Docker { + d := New() + d.newClient = prepareNewClientFunc(nil) + return d +} + +func prepareNewClientFunc(m *mockClient) func(_ Config) (dockerClient, error) { + if m == nil { + return func(_ Config) (dockerClient, error) { return nil, errors.New("mock.newClient() error") } + } + return func(_ Config) (dockerClient, error) { return m, nil } +} + +type mockClient struct { + errOnInfo bool + errOnImageList bool + errOnContainerList bool + negotiateAPIVersionCalled bool + closeCalled bool +} + +func (m *mockClient) Info(_ context.Context) (typesSystem.Info, error) { + if m.errOnInfo { + return typesSystem.Info{}, errors.New("mockClient.Info() error") + } + + return typesSystem.Info{ + ContainersRunning: 4, + ContainersPaused: 5, + ContainersStopped: 6, + }, nil +} + +func (m *mockClient) ContainerList(_ context.Context, opts typesContainer.ListOptions) ([]types.Container, error) { + if m.errOnContainerList { + return nil, errors.New("mockClient.ContainerList() error") + } + + v := opts.Filters.Get("health") + + if len(v) == 0 { + return nil, errors.New("mockClient.ContainerList() error (expect 'health' filter)") + } + + var containers []types.Container + + switch v[0] { + case types.Healthy: + containers = []types.Container{ + {Names: []string{"container1"}, State: "created", Image: "example/example:v1"}, + {Names: []string{"container2"}, State: "running", Image: "example/example:v1"}, + {Names: []string{"container3"}, State: "running", Image: "example/example:v1"}, + } + case types.Unhealthy: + containers = []types.Container{ + {Names: []string{"container4"}, State: "created", Image: "example/example:v2"}, + {Names: []string{"container5"}, State: "running", Image: "example/example:v2"}, + {Names: []string{"container6"}, State: "paused", Image: "example/example:v2"}, + {Names: []string{"container7"}, State: "restarting", Image: "example/example:v2"}, + {Names: []string{"container8"}, State: "removing", Image: "example/example:v2"}, + {Names: []string{"container9"}, State: "exited", Image: "example/example:v2"}, + {Names: []string{"container10"}, State: "dead", Image: "example/example:v2"}, + } + case types.Starting: + containers = []types.Container{ + {Names: []string{"container11"}, State: "removing", Image: "example/example:v3"}, + {Names: []string{"container12"}, State: "exited", Image: "example/example:v3"}, + {Names: []string{"container13"}, State: "exited", Image: "example/example:v3"}, + } + case types.NoHealthcheck: + containers = []types.Container{ + {Names: []string{"container14"}, State: "dead", Image: "example/example:v4"}, + {Names: []string{"container15"}, State: "dead", Image: "example/example:v4"}, + {Names: []string{"container16"}, State: "dead", Image: "example/example:v4"}, + } + } + + if opts.Size { + for _, c := range containers { + c.SizeRw = 123 + c.SizeRootFs = 321 + } + } + + return containers, nil +} + +func (m *mockClient) ImageList(_ context.Context, _ types.ImageListOptions) ([]typesImage.Summary, error) { + if m.errOnImageList { + return nil, errors.New("mockClient.ImageList() error") + } + + return []typesImage.Summary{ + { + Containers: 0, + Size: 100, + }, + { + Containers: 1, + Size: 200, + }, + }, nil +} + +func (m *mockClient) NegotiateAPIVersion(_ context.Context) { + m.negotiateAPIVersionCalled = true +} + +func (m *mockClient) Close() error { + m.closeCalled = true + return nil +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md b/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md new file mode 100644 index 000000000..abe7fe438 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md @@ -0,0 +1,208 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml" +sidebar_label: "Docker" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/Containers and VMs" +most_popular: True +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Docker + + +<img src="https://netdata.cloud/img/docker.svg" width="150"/> + + +Plugin: go.d.plugin +Module: docker + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector monitors Docker containers state, health status and more. + + +It connects to the Docker instance via a TCP or UNIX socket and executes the following commands: + +- [System info](https://docs.docker.com/engine/api/v1.43/#tag/System/operation/SystemInfo). +- [List images](https://docs.docker.com/engine/api/v1.43/#tag/Image/operation/ImageList). +- [List containers](https://docs.docker.com/engine/api/v1.43/#tag/Container/operation/ContainerList). + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + +Requires netdata user to be in the docker group. + +### Default Behavior + +#### Auto-Detection + +It discovers instances running on localhost by attempting to connect to a known Docker UNIX socket: `/var/run/docker.sock`. + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +Enabling `collect_container_size` may result in high CPU usage depending on the version of Docker Engine. + + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Docker instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| docker.containers_state | running, paused, stopped | containers | +| docker.containers_health_status | healthy, unhealthy, not_running_unhealthy, starting, no_healthcheck | containers | +| docker.images | active, dangling | images | +| docker.images_size | size | bytes | + +### Per container + +Metrics related to containers. Each container provides its own set of the following metrics. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| container_name | The container's name | +| image | The image name the container uses | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| docker.container_state | running, paused, exited, created, restarting, removing, dead | state | +| docker.container_health_status | healthy, unhealthy, not_running_unhealthy, starting, no_healthcheck | status | +| docker.container_writeable_layer_size | writeable_layer | size | + + + +## Alerts + + +The following alerts are available: + +| Alert name | On metric | Description | +|:------------|:----------|:------------| +| [ docker_container_unhealthy ](https://github.com/netdata/netdata/blob/master/src/health/health.d/docker.conf) | docker.container_health_status | ${label:container_name} docker container health status is unhealthy | + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/docker.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/docker.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| address | Docker daemon's listening address. When using a TCP socket, the format is: tcp://[ip]:[port] | unix:///var/run/docker.sock | yes | +| timeout | Request timeout in seconds. | 2 | no | +| collect_container_size | Whether to collect container writable layer size. | no | no | + +</details> + +#### Examples + +##### Basic + +An example configuration. + +```yaml +jobs: + - name: local + address: 'unix:///var/run/docker.sock' + +``` +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +<details><summary>Config</summary> + +```yaml +jobs: + - name: local + address: 'unix:///var/run/docker.sock' + + - name: remote + address: 'tcp://203.0.113.10:2375' + +``` +</details> + + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `docker` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m docker + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml b/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml new file mode 100644 index 000000000..8fc6853a9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml @@ -0,0 +1,190 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-docker + plugin_name: go.d.plugin + module_name: docker + alternative_monitored_instances: [] + monitored_instance: + name: Docker + link: https://www.docker.com/ + categories: + - data-collection.containers-and-vms + icon_filename: docker.svg + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - container + most_popular: true + overview: + data_collection: + metrics_description: | + This collector monitors Docker containers state, health status and more. + method_description: | + It connects to the Docker instance via a TCP or UNIX socket and executes the following commands: + + - [System info](https://docs.docker.com/engine/api/v1.43/#tag/System/operation/SystemInfo). + - [List images](https://docs.docker.com/engine/api/v1.43/#tag/Image/operation/ImageList). + - [List containers](https://docs.docker.com/engine/api/v1.43/#tag/Container/operation/ContainerList). + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: Requires netdata user to be in the docker group. + default_behavior: + auto_detection: + description: | + It discovers instances running on localhost by attempting to connect to a known Docker UNIX socket: `/var/run/docker.sock`. + limits: + description: "" + performance_impact: + description: | + Enabling `collect_container_size` may result in high CPU usage depending on the version of Docker Engine. + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/docker.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: address + description: 'Docker daemon''s listening address. When using a TCP socket, the format is: tcp://[ip]:[port]' + default_value: unix:///var/run/docker.sock + required: true + - name: timeout + description: Request timeout in seconds. + default_value: 2 + required: false + - name: collect_container_size + description: Whether to collect container writable layer size. + default_value: "no" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Basic + description: An example configuration. + folding: + enabled: false + config: | + jobs: + - name: local + address: 'unix:///var/run/docker.sock' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + address: 'unix:///var/run/docker.sock' + + - name: remote + address: 'tcp://203.0.113.10:2375' + troubleshooting: + problems: + list: [] + alerts: + - name: docker_container_unhealthy + metric: docker.container_health_status + info: ${label:container_name} docker container health status is unhealthy + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/docker.conf + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: docker.containers_state + description: Total number of Docker containers in various states + unit: containers + chart_type: stacked + dimensions: + - name: running + - name: paused + - name: stopped + - name: docker.containers_health_status + description: Total number of Docker containers in various health states + unit: containers + chart_type: line + dimensions: + - name: healthy + - name: unhealthy + - name: not_running_unhealthy + - name: starting + - name: no_healthcheck + - name: docker.images + description: Total number of Docker images in various states + unit: images + chart_type: stacked + dimensions: + - name: active + - name: dangling + - name: docker.images_size + description: Total size of all Docker images + unit: bytes + chart_type: line + dimensions: + - name: size + - name: container + description: Metrics related to containers. Each container provides its own set of the following metrics. + labels: + - name: container_name + description: The container's name + - name: image + description: The image name the container uses + metrics: + - name: docker.container_state + description: Docker container state + unit: state + chart_type: line + dimensions: + - name: running + - name: paused + - name: exited + - name: created + - name: restarting + - name: removing + - name: dead + - name: docker.container_health_status + description: Docker container health status + unit: status + chart_type: line + dimensions: + - name: healthy + - name: unhealthy + - name: not_running_unhealthy + - name: starting + - name: no_healthcheck + - name: docker.container_writeable_layer_size + description: Docker container writable layer size + unit: size + chart_type: line + dimensions: + - name: writeable_layer diff --git a/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json new file mode 100644 index 000000000..5e687448c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json @@ -0,0 +1,6 @@ +{ + "update_every": 123, + "address": "ok", + "timeout": 123.123, + "collect_container_size": true +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml new file mode 100644 index 000000000..2b0f32225 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml @@ -0,0 +1,4 @@ +update_every: 123 +address: "ok" +timeout: 123.123 +collect_container_size: yes diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/README.md b/src/go/collectors/go.d.plugin/modules/docker_engine/README.md new file mode 120000 index 000000000..f00a4cd97 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/README.md @@ -0,0 +1 @@ +integrations/docker_engine.md
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/charts.go b/src/go/collectors/go.d.plugin/modules/docker_engine/charts.go new file mode 100644 index 000000000..d23f6e780 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/charts.go @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +import "github.com/netdata/netdata/go/go.d.plugin/agent/module" + +type ( + Charts = module.Charts + Dims = module.Dims +) + +var charts = Charts{ + { + ID: "engine_daemon_container_actions", + Title: "Container Actions", + Units: "actions/s", + Fam: "containers", + Ctx: "docker_engine.engine_daemon_container_actions", + Type: module.Stacked, + Dims: Dims{ + {ID: "container_actions_changes", Name: "changes", Algo: module.Incremental}, + {ID: "container_actions_commit", Name: "commit", Algo: module.Incremental}, + {ID: "container_actions_create", Name: "create", Algo: module.Incremental}, + {ID: "container_actions_delete", Name: "delete", Algo: module.Incremental}, + {ID: "container_actions_start", Name: "start", Algo: module.Incremental}, + }, + }, + { + ID: "engine_daemon_container_states_containers", + Title: "Containers In Various States", + Units: "containers", + Fam: "containers", + Ctx: "docker_engine.engine_daemon_container_states_containers", + Type: module.Stacked, + Dims: Dims{ + {ID: "container_states_running", Name: "running"}, + {ID: "container_states_paused", Name: "paused"}, + {ID: "container_states_stopped", Name: "stopped"}, + }, + }, + { + ID: "builder_builds_failed_total", + Title: "Builder Builds Fails By Reason", + Units: "fails/s", + Fam: "builder", + Ctx: "docker_engine.builder_builds_failed_total", + Type: module.Stacked, + Dims: Dims{ + {ID: "builder_fails_build_canceled", Name: "build_canceled", Algo: module.Incremental}, + {ID: "builder_fails_build_target_not_reachable_error", Name: "build_target_not_reachable_error", Algo: module.Incremental}, + {ID: "builder_fails_command_not_supported_error", Name: "command_not_supported_error", Algo: module.Incremental}, + {ID: "builder_fails_dockerfile_empty_error", Name: "dockerfile_empty_error", Algo: module.Incremental}, + {ID: "builder_fails_dockerfile_syntax_error", Name: "dockerfile_syntax_error", Algo: module.Incremental}, + {ID: "builder_fails_error_processing_commands_error", Name: "error_processing_commands_error", Algo: module.Incremental}, + {ID: "builder_fails_missing_onbuild_arguments_error", Name: "missing_onbuild_arguments_error", Algo: module.Incremental}, + {ID: "builder_fails_unknown_instruction_error", Name: "unknown_instruction_error", Algo: module.Incremental}, + }, + }, + { + ID: "engine_daemon_health_checks_failed_total", + Title: "Health Checks", + Units: "events/s", + Fam: "health checks", + Ctx: "docker_engine.engine_daemon_health_checks_failed_total", + Dims: Dims{ + {ID: "health_checks_failed", Name: "fails", Algo: module.Incremental}, + }, + }, +} + +var swarmManagerCharts = Charts{ + { + ID: "swarm_manager_leader", + Title: "Swarm Manager Leader", + Units: "bool", + Fam: "swarm", + Ctx: "docker_engine.swarm_manager_leader", + Dims: Dims{ + {ID: "swarm_manager_leader", Name: "is_leader"}, + }, + }, + { + ID: "swarm_manager_object_store", + Title: "Swarm Manager Object Store", + Units: "objects", + Fam: "swarm", + Type: module.Stacked, + Ctx: "docker_engine.swarm_manager_object_store", + Dims: Dims{ + {ID: "swarm_manager_nodes_total", Name: "nodes"}, + {ID: "swarm_manager_services_total", Name: "services"}, + {ID: "swarm_manager_tasks_total", Name: "tasks"}, + {ID: "swarm_manager_networks_total", Name: "networks"}, + {ID: "swarm_manager_secrets_total", Name: "secrets"}, + {ID: "swarm_manager_configs_total", Name: "configs"}, + }, + }, + { + ID: "swarm_manager_nodes_per_state", + Title: "Swarm Manager Nodes Per State", + Units: "nodes", + Fam: "swarm", + Ctx: "docker_engine.swarm_manager_nodes_per_state", + Type: module.Stacked, + Dims: Dims{ + {ID: "swarm_manager_nodes_state_ready", Name: "ready"}, + {ID: "swarm_manager_nodes_state_down", Name: "down"}, + {ID: "swarm_manager_nodes_state_unknown", Name: "unknown"}, + {ID: "swarm_manager_nodes_state_disconnected", Name: "disconnected"}, + }, + }, + { + ID: "swarm_manager_tasks_per_state", + Title: "Swarm Manager Tasks Per State", + Units: "tasks", + Fam: "swarm", + Ctx: "docker_engine.swarm_manager_tasks_per_state", + Type: module.Stacked, + Dims: Dims{ + {ID: "swarm_manager_tasks_state_running", Name: "running"}, + {ID: "swarm_manager_tasks_state_failed", Name: "failed"}, + {ID: "swarm_manager_tasks_state_ready", Name: "ready"}, + {ID: "swarm_manager_tasks_state_rejected", Name: "rejected"}, + {ID: "swarm_manager_tasks_state_starting", Name: "starting"}, + {ID: "swarm_manager_tasks_state_shutdown", Name: "shutdown"}, + {ID: "swarm_manager_tasks_state_new", Name: "new"}, + {ID: "swarm_manager_tasks_state_orphaned", Name: "orphaned"}, + {ID: "swarm_manager_tasks_state_preparing", Name: "preparing"}, + {ID: "swarm_manager_tasks_state_pending", Name: "pending"}, + {ID: "swarm_manager_tasks_state_complete", Name: "complete"}, + {ID: "swarm_manager_tasks_state_remove", Name: "remove"}, + {ID: "swarm_manager_tasks_state_accepted", Name: "accepted"}, + {ID: "swarm_manager_tasks_state_assigned", Name: "assigned"}, + }, + }, +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/collect.go b/src/go/collectors/go.d.plugin/modules/docker_engine/collect.go new file mode 100644 index 000000000..171d58b55 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/collect.go @@ -0,0 +1,212 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +import ( + "fmt" + + "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" + "github.com/netdata/netdata/go/go.d.plugin/pkg/stm" +) + +func isDockerEngineMetrics(pms prometheus.Series) bool { + return pms.FindByName("engine_daemon_engine_info").Len() > 0 +} + +func (de *DockerEngine) collect() (map[string]int64, error) { + pms, err := de.prom.ScrapeSeries() + if err != nil { + return nil, err + } + + if !isDockerEngineMetrics(pms) { + return nil, fmt.Errorf("'%s' returned non docker engine metrics", de.URL) + } + + mx := de.collectMetrics(pms) + return stm.ToMap(mx), nil +} + +func (de *DockerEngine) collectMetrics(pms prometheus.Series) metrics { + var mx metrics + collectHealthChecks(&mx, pms) + collectContainerActions(&mx, pms) + collectBuilderBuildsFails(&mx, pms) + if hasContainerStates(pms) { + de.hasContainerStates = true + mx.Container.States = &containerStates{} + collectContainerStates(&mx, pms) + } + if isSwarmManager(pms) { + de.isSwarmManager = true + mx.SwarmManager = &swarmManager{} + collectSwarmManager(&mx, pms) + } + return mx +} + +func isSwarmManager(pms prometheus.Series) bool { + return pms.FindByName("swarm_node_manager").Max() == 1 +} + +func hasContainerStates(pms prometheus.Series) bool { + return pms.FindByName("engine_daemon_container_states_containers").Len() > 0 +} + +func collectHealthChecks(mx *metrics, raw prometheus.Series) { + v := raw.FindByName("engine_daemon_health_checks_failed_total").Max() + mx.HealthChecks.Failed = v +} + +func collectContainerActions(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName("engine_daemon_container_actions_seconds_count") { + action := metric.Labels.Get("action") + if action == "" { + continue + } + + v := metric.Value + switch action { + default: + case "changes": + mx.Container.Actions.Changes = v + case "commit": + mx.Container.Actions.Commit = v + case "create": + mx.Container.Actions.Create = v + case "delete": + mx.Container.Actions.Delete = v + case "start": + mx.Container.Actions.Start = v + } + } +} + +func collectContainerStates(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName("engine_daemon_container_states_containers") { + state := metric.Labels.Get("state") + if state == "" { + continue + } + + v := metric.Value + switch state { + default: + case "paused": + mx.Container.States.Paused = v + case "running": + mx.Container.States.Running = v + case "stopped": + mx.Container.States.Stopped = v + } + } +} + +func collectBuilderBuildsFails(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName("builder_builds_failed_total") { + reason := metric.Labels.Get("reason") + if reason == "" { + continue + } + + v := metric.Value + switch reason { + default: + case "build_canceled": + mx.Builder.FailsByReason.BuildCanceled = v + case "build_target_not_reachable_error": + mx.Builder.FailsByReason.BuildTargetNotReachableError = v + case "command_not_supported_error": + mx.Builder.FailsByReason.CommandNotSupportedError = v + case "dockerfile_empty_error": + mx.Builder.FailsByReason.DockerfileEmptyError = v + case "dockerfile_syntax_error": + mx.Builder.FailsByReason.DockerfileSyntaxError = v + case "error_processing_commands_error": + mx.Builder.FailsByReason.ErrorProcessingCommandsError = v + case "missing_onbuild_arguments_error": + mx.Builder.FailsByReason.MissingOnbuildArgumentsError = v + case "unknown_instruction_error": + mx.Builder.FailsByReason.UnknownInstructionError = v + } + } +} + +func collectSwarmManager(mx *metrics, raw prometheus.Series) { + v := raw.FindByName("swarm_manager_configs_total").Max() + mx.SwarmManager.Configs = v + + v = raw.FindByName("swarm_manager_networks_total").Max() + mx.SwarmManager.Networks = v + + v = raw.FindByName("swarm_manager_secrets_total").Max() + mx.SwarmManager.Secrets = v + + v = raw.FindByName("swarm_manager_services_total").Max() + mx.SwarmManager.Services = v + + v = raw.FindByName("swarm_manager_leader").Max() + mx.SwarmManager.IsLeader = v + + for _, metric := range raw.FindByName("swarm_manager_nodes") { + state := metric.Labels.Get("state") + if state == "" { + continue + } + + v := metric.Value + switch state { + default: + case "disconnected": + mx.SwarmManager.Nodes.PerState.Disconnected = v + case "down": + mx.SwarmManager.Nodes.PerState.Down = v + case "ready": + mx.SwarmManager.Nodes.PerState.Ready = v + case "unknown": + mx.SwarmManager.Nodes.PerState.Unknown = v + } + mx.SwarmManager.Nodes.Total += v + } + + for _, metric := range raw.FindByName("swarm_manager_tasks_total") { + state := metric.Labels.Get("state") + if state == "" { + continue + } + + v := metric.Value + switch state { + default: + case "accepted": + mx.SwarmManager.Tasks.PerState.Accepted = v + case "assigned": + mx.SwarmManager.Tasks.PerState.Assigned = v + case "complete": + mx.SwarmManager.Tasks.PerState.Complete = v + case "failed": + mx.SwarmManager.Tasks.PerState.Failed = v + case "new": + mx.SwarmManager.Tasks.PerState.New = v + case "orphaned": + mx.SwarmManager.Tasks.PerState.Orphaned = v + case "pending": + mx.SwarmManager.Tasks.PerState.Pending = v + case "preparing": + mx.SwarmManager.Tasks.PerState.Preparing = v + case "ready": + mx.SwarmManager.Tasks.PerState.Ready = v + case "rejected": + mx.SwarmManager.Tasks.PerState.Rejected = v + case "remove": + mx.SwarmManager.Tasks.PerState.Remove = v + case "running": + mx.SwarmManager.Tasks.PerState.Running = v + case "shutdown": + mx.SwarmManager.Tasks.PerState.Shutdown = v + case "starting": + mx.SwarmManager.Tasks.PerState.Starting = v + } + mx.SwarmManager.Tasks.Total += v + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/config_schema.json b/src/go/collectors/go.d.plugin/modules/docker_engine/config_schema.json new file mode 100644 index 000000000..e967e29cd --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/config_schema.json @@ -0,0 +1,163 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Docker Engine collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "url": { + "title": "URL", + "description": "The URL of the Docker Engine [metrics endpoint](https://docs.docker.com/config/daemon/prometheus/#configure-the-daemon).", + "type": "string", + "default": "http://127.0.0.1:9323/metrics", + "format": "uri" + }, + "timeout": { + "title": "Timeout", + "description": "The timeout in seconds for the HTTP request.", + "type": "number", + "minimum": 0.5, + "default": 1 + }, + "not_follow_redirects": { + "title": "Not follow redirects", + "description": "If set, the client will not follow HTTP redirects automatically.", + "type": "boolean" + }, + "username": { + "title": "Username", + "description": "The username for basic authentication.", + "type": "string", + "sensitive": true + }, + "password": { + "title": "Password", + "description": "The password for basic authentication.", + "type": "string", + "sensitive": true + }, + "proxy_url": { + "title": "Proxy URL", + "description": "The URL of the proxy server.", + "type": "string" + }, + "proxy_username": { + "title": "Proxy username", + "description": "The username for proxy authentication.", + "type": "string", + "sensitive": true + }, + "proxy_password": { + "title": "Proxy password", + "description": "The password for proxy authentication.", + "type": "string", + "sensitive": true + }, + "headers": { + "title": "Headers", + "description": "Additional HTTP headers to include in the request.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, + "tls_skip_verify": { + "title": "Skip TLS verification", + "description": "If set, TLS certificate verification will be skipped.", + "type": "boolean" + }, + "tls_ca": { + "title": "TLS CA", + "description": "The path to the CA certificate file for TLS verification.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_cert": { + "title": "TLS certificate", + "description": "The path to the client certificate file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_key": { + "title": "TLS key", + "description": "The path to the client key file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + } + }, + "required": [ + "url" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "ui:flavour": "tabs", + "ui:options": { + "tabs": [ + { + "title": "Base", + "fields": [ + "update_every", + "url", + "timeout", + "not_follow_redirects" + ] + }, + { + "title": "Auth", + "fields": [ + "username", + "password" + ] + }, + { + "title": "TLS", + "fields": [ + "tls_skip_verify", + "tls_ca", + "tls_cert", + "tls_key" + ] + }, + { + "title": "Proxy", + "fields": [ + "proxy_url", + "proxy_username", + "proxy_password" + ] + }, + { + "title": "Headers", + "fields": [ + "headers" + ] + } + ] + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + }, + "password": { + "ui:widget": "password" + }, + "proxy_password": { + "ui:widget": "password" + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine.go b/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine.go new file mode 100644 index 000000000..629048dcd --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine.go @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("docker_engine", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + }) +} + +func New() *DockerEngine { + return &DockerEngine{ + Config: Config{ + HTTP: web.HTTP{ + Request: web.Request{ + URL: "http://127.0.0.1:9323/metrics", + }, + Client: web.Client{ + Timeout: web.Duration(time.Second), + }, + }, + }, + } +} + +type Config struct { + web.HTTP `yaml:",inline" json:""` + UpdateEvery int `yaml:"update_every" json:"update_every"` +} + +type DockerEngine struct { + module.Base + Config `yaml:",inline" json:""` + + prom prometheus.Prometheus + + isSwarmManager bool + hasContainerStates bool +} + +func (de *DockerEngine) Configuration() any { + return de.Config +} + +func (de *DockerEngine) Init() error { + if err := de.validateConfig(); err != nil { + de.Errorf("config validation: %v", err) + return err + } + + prom, err := de.initPrometheusClient() + if err != nil { + de.Error(err) + return err + } + de.prom = prom + + return nil +} + +func (de *DockerEngine) Check() error { + mx, err := de.collect() + if err != nil { + de.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + + } + return nil +} + +func (de *DockerEngine) Charts() *Charts { + cs := charts.Copy() + if !de.hasContainerStates { + if err := cs.Remove("engine_daemon_container_states_containers"); err != nil { + de.Warning(err) + } + } + + if !de.isSwarmManager { + return cs + } + + if err := cs.Add(*swarmManagerCharts.Copy()...); err != nil { + de.Warning(err) + } + + return cs +} + +func (de *DockerEngine) Collect() map[string]int64 { + mx, err := de.collect() + if err != nil { + de.Error(err) + return nil + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (de *DockerEngine) Cleanup() { + if de.prom != nil && de.prom.HTTPClient() != nil { + de.prom.HTTPClient().CloseIdleConnections() + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine_test.go b/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine_test.go new file mode 100644 index 000000000..193214274 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/docker_engine_test.go @@ -0,0 +1,372 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +import ( + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/tlscfg" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") + + dataNonDockerEngineMetrics, _ = os.ReadFile("testdata/non-docker-engine.txt") + dataVer17050Metrics, _ = os.ReadFile("testdata/v17.05.0-ce.txt") + dataVer18093Metrics, _ = os.ReadFile("testdata/v18.09.3-ce.txt") + dataVer18093SwarmMetrics, _ = os.ReadFile("testdata/v18.09.3-ce-swarm.txt") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + "dataNonDockerEngineMetrics": dataNonDockerEngineMetrics, + "dataVer17050Metrics": dataVer17050Metrics, + "dataVer18093Metrics": dataVer18093Metrics, + "dataVer18093SwarmMetrics": dataVer18093SwarmMetrics, + } { + require.NotNil(t, data, name) + } +} + +func TestDockerEngine_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &DockerEngine{}, dataConfigJSON, dataConfigYAML) +} + +func TestDockerEngine_Cleanup(t *testing.T) { + assert.NotPanics(t, New().Cleanup) +} + +func TestDockerEngine_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "default": { + config: New().Config, + }, + "empty URL": { + config: Config{HTTP: web.HTTP{Request: web.Request{URL: ""}}}, + wantFail: true, + }, + "nonexistent TLS CA": { + config: Config{HTTP: web.HTTP{ + Request: web.Request{URL: "http://127.0.0.1:9323/metrics"}, + Client: web.Client{TLSConfig: tlscfg.TLSConfig{TLSCA: "testdata/tls"}}}}, + wantFail: true, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dockerEngine := New() + dockerEngine.Config = test.config + + if test.wantFail { + assert.Error(t, dockerEngine.Init()) + } else { + assert.NoError(t, dockerEngine.Init()) + } + }) + } +} + +func TestDockerEngine_Check(t *testing.T) { + tests := map[string]struct { + prepare func(*testing.T) (*DockerEngine, *httptest.Server) + wantFail bool + }{ + "v17.05.0-ce": {prepare: prepareClientServerV17050CE}, + "v18.09.3-ce": {prepare: prepareClientServerV18093CE}, + "v18.09.3-ce-swarm": {prepare: prepareClientServerV18093CESwarm}, + "non docker engine": {prepare: prepareClientServerNonDockerEngine, wantFail: true}, + "invalid data": {prepare: prepareClientServerInvalidData, wantFail: true}, + "404": {prepare: prepareClientServer404, wantFail: true}, + "connection refused": {prepare: prepareClientServerConnectionRefused, wantFail: true}, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dockerEngine, srv := test.prepare(t) + defer srv.Close() + + if test.wantFail { + assert.Error(t, dockerEngine.Check()) + } else { + assert.NoError(t, dockerEngine.Check()) + } + }) + } +} + +func TestDockerEngine_Charts(t *testing.T) { + tests := map[string]struct { + prepare func(*testing.T) (*DockerEngine, *httptest.Server) + wantNumCharts int + }{ + "v17.05.0-ce": {prepare: prepareClientServerV17050CE, wantNumCharts: len(charts) - 1}, // no container states chart + "v18.09.3-ce": {prepare: prepareClientServerV18093CE, wantNumCharts: len(charts)}, + "v18.09.3-ce-swarm": {prepare: prepareClientServerV18093CESwarm, wantNumCharts: len(charts) + len(swarmManagerCharts)}, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dockerEngine, srv := test.prepare(t) + defer srv.Close() + + require.NoError(t, dockerEngine.Check()) + assert.Len(t, *dockerEngine.Charts(), test.wantNumCharts) + }) + } +} + +func TestDockerEngine_Collect_ReturnsNilOnErrors(t *testing.T) { + tests := map[string]struct { + prepare func(*testing.T) (*DockerEngine, *httptest.Server) + }{ + "non docker engine": {prepare: prepareClientServerNonDockerEngine}, + "invalid data": {prepare: prepareClientServerInvalidData}, + "404": {prepare: prepareClientServer404}, + "connection refused": {prepare: prepareClientServerConnectionRefused}, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dockerEngine, srv := test.prepare(t) + defer srv.Close() + + assert.Nil(t, dockerEngine.Collect()) + }) + } +} + +func TestDockerEngine_Collect(t *testing.T) { + tests := map[string]struct { + prepare func(*testing.T) (*DockerEngine, *httptest.Server) + expected map[string]int64 + }{ + "v17.05.0-ce": { + prepare: prepareClientServerV17050CE, + expected: map[string]int64{ + "builder_fails_build_canceled": 1, + "builder_fails_build_target_not_reachable_error": 2, + "builder_fails_command_not_supported_error": 3, + "builder_fails_dockerfile_empty_error": 4, + "builder_fails_dockerfile_syntax_error": 5, + "builder_fails_error_processing_commands_error": 6, + "builder_fails_missing_onbuild_arguments_error": 7, + "builder_fails_unknown_instruction_error": 8, + "container_actions_changes": 1, + "container_actions_commit": 1, + "container_actions_create": 1, + "container_actions_delete": 1, + "container_actions_start": 1, + "health_checks_failed": 33, + }, + }, + "v18.09.3-ce": { + prepare: prepareClientServerV18093CE, + expected: map[string]int64{ + "builder_fails_build_canceled": 1, + "builder_fails_build_target_not_reachable_error": 2, + "builder_fails_command_not_supported_error": 3, + "builder_fails_dockerfile_empty_error": 4, + "builder_fails_dockerfile_syntax_error": 5, + "builder_fails_error_processing_commands_error": 6, + "builder_fails_missing_onbuild_arguments_error": 7, + "builder_fails_unknown_instruction_error": 8, + "container_actions_changes": 1, + "container_actions_commit": 1, + "container_actions_create": 1, + "container_actions_delete": 1, + "container_actions_start": 1, + "container_states_paused": 11, + "container_states_running": 12, + "container_states_stopped": 13, + "health_checks_failed": 33, + }, + }, + "v18.09.3-ce-swarm": { + prepare: prepareClientServerV18093CESwarm, + expected: map[string]int64{ + "builder_fails_build_canceled": 1, + "builder_fails_build_target_not_reachable_error": 2, + "builder_fails_command_not_supported_error": 3, + "builder_fails_dockerfile_empty_error": 4, + "builder_fails_dockerfile_syntax_error": 5, + "builder_fails_error_processing_commands_error": 6, + "builder_fails_missing_onbuild_arguments_error": 7, + "builder_fails_unknown_instruction_error": 8, + "container_actions_changes": 1, + "container_actions_commit": 1, + "container_actions_create": 1, + "container_actions_delete": 1, + "container_actions_start": 1, + "container_states_paused": 11, + "container_states_running": 12, + "container_states_stopped": 13, + "health_checks_failed": 33, + "swarm_manager_configs_total": 1, + "swarm_manager_leader": 1, + "swarm_manager_networks_total": 3, + "swarm_manager_nodes_state_disconnected": 1, + "swarm_manager_nodes_state_down": 2, + "swarm_manager_nodes_state_ready": 3, + "swarm_manager_nodes_state_unknown": 4, + "swarm_manager_nodes_total": 10, + "swarm_manager_secrets_total": 1, + "swarm_manager_services_total": 1, + "swarm_manager_tasks_state_accepted": 1, + "swarm_manager_tasks_state_assigned": 2, + "swarm_manager_tasks_state_complete": 3, + "swarm_manager_tasks_state_failed": 4, + "swarm_manager_tasks_state_new": 5, + "swarm_manager_tasks_state_orphaned": 6, + "swarm_manager_tasks_state_pending": 7, + "swarm_manager_tasks_state_preparing": 8, + "swarm_manager_tasks_state_ready": 9, + "swarm_manager_tasks_state_rejected": 10, + "swarm_manager_tasks_state_remove": 11, + "swarm_manager_tasks_state_running": 12, + "swarm_manager_tasks_state_shutdown": 13, + "swarm_manager_tasks_state_starting": 14, + "swarm_manager_tasks_total": 105, + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + pulsar, srv := test.prepare(t) + defer srv.Close() + + for i := 0; i < 10; i++ { + _ = pulsar.Collect() + } + collected := pulsar.Collect() + + require.NotNil(t, collected) + require.Equal(t, test.expected, collected) + ensureCollectedHasAllChartsDimsVarsIDs(t, pulsar, collected) + }) + } +} + +func ensureCollectedHasAllChartsDimsVarsIDs(t *testing.T, dockerEngine *DockerEngine, collected map[string]int64) { + t.Helper() + for _, chart := range *dockerEngine.Charts() { + for _, dim := range chart.Dims { + _, ok := collected[dim.ID] + assert.Truef(t, ok, "collected metrics has no data for dim '%s' chart '%s'", dim.ID, chart.ID) + } + for _, v := range chart.Vars { + _, ok := collected[v.ID] + assert.Truef(t, ok, "collected metrics has no data for var '%s' chart '%s'", v.ID, chart.ID) + } + } +} + +func prepareClientServerV17050CE(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write(dataVer17050Metrics) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServerV18093CE(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write(dataVer18093Metrics) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServerV18093CESwarm(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write(dataVer18093SwarmMetrics) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServerNonDockerEngine(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write(dataNonDockerEngineMetrics) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServerInvalidData(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("hello and\n goodbye")) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServer404(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + + dockerEngine := New() + dockerEngine.URL = srv.URL + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} + +func prepareClientServerConnectionRefused(t *testing.T) (*DockerEngine, *httptest.Server) { + t.Helper() + srv := httptest.NewServer(nil) + + dockerEngine := New() + dockerEngine.URL = "http://127.0.0.1:38001/metrics" + require.NoError(t, dockerEngine.Init()) + + return dockerEngine, srv +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/init.go b/src/go/collectors/go.d.plugin/modules/docker_engine/init.go new file mode 100644 index 000000000..5e06f545e --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/init.go @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +import ( + "errors" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" + + "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" +) + +func (de *DockerEngine) validateConfig() error { + if de.URL == "" { + return errors.New("url not set") + } + return nil +} + +func (de *DockerEngine) initPrometheusClient() (prometheus.Prometheus, error) { + client, err := web.NewHTTPClient(de.Client) + if err != nil { + return nil, err + } + return prometheus.New(client, de.Request), nil +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/integrations/docker_engine.md b/src/go/collectors/go.d.plugin/modules/docker_engine/integrations/docker_engine.md new file mode 100644 index 000000000..19bf4b11e --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/integrations/docker_engine.md @@ -0,0 +1,229 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker_engine/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker_engine/metadata.yaml" +sidebar_label: "Docker Engine" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/Containers and VMs" +most_popular: False +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Docker Engine + + +<img src="https://netdata.cloud/img/docker.svg" width="150"/> + + +Plugin: go.d.plugin +Module: docker_engine + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector monitors the activity and health of Docker Engine and Docker Swarm. + + +The [built-in](https://docs.docker.com/config/daemon/prometheus/) Prometheus exporter is used to get the metrics. + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +It discovers instances running on localhost by attempting to connect to a known Docker TCP socket: `http://127.0.0.1:9323/metrics`. + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Docker Engine instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| docker_engine.engine_daemon_container_actions | changes, commit, create, delete, start | actions/s | +| docker_engine.engine_daemon_container_states_containers | running, paused, stopped | containers | +| docker_engine.builder_builds_failed_total | build_canceled, build_target_not_reachable_error, command_not_supported_error, dockerfile_empty_error, dockerfile_syntax_error, error_processing_commands_error, missing_onbuild_arguments_error, unknown_instruction_error | fails/s | +| docker_engine.engine_daemon_health_checks_failed_total | fails | events/s | +| docker_engine.swarm_manager_leader | is_leader | bool | +| docker_engine.swarm_manager_object_store | nodes, services, tasks, networks, secrets, configs | objects | +| docker_engine.swarm_manager_nodes_per_state | ready, down, unknown, disconnected | nodes | +| docker_engine.swarm_manager_tasks_per_state | running, failed, ready, rejected, starting, shutdown, new, orphaned, preparing, pending, complete, remove, accepted, assigned | tasks | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +#### Enable built-in Prometheus exporter + +To enable built-in Prometheus exporter, follow the [official documentation](https://docs.docker.com/config/daemon/prometheus/#configure-docker). + + + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/docker_engine.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/docker_engine.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| url | Server URL. | http://127.0.0.1:9323/metrics | yes | +| username | Username for basic HTTP authentication. | | no | +| password | Password for basic HTTP authentication. | | no | +| proxy_url | Proxy URL. | | no | +| proxy_username | Username for proxy basic HTTP authentication. | | no | +| proxy_password | Password for proxy basic HTTP authentication. | | no | +| timeout | HTTP request timeout. | 1 | no | +| method | HTTP request method. | GET | no | +| body | HTTP request body. | | no | +| headers | HTTP request headers. | | no | +| not_follow_redirects | Redirect handling policy. Controls whether the client follows redirects. | no | no | +| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no | +| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no | +| tls_cert | Client TLS certificate. | | no | +| tls_key | Client TLS key. | | no | + +</details> + +#### Examples + +##### Basic + +A basic example configuration. + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9323/metrics + +``` +##### HTTP authentication + +Basic HTTP authentication. + +<details><summary>Config</summary> + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9323/metrics + username: username + password: password + +``` +</details> + +##### HTTPS with self-signed certificate + +Configuration with enabled HTTPS and self-signed certificate. + +<details><summary>Config</summary> + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9323/metrics + tls_skip_verify: yes + +``` +</details> + +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +<details><summary>Config</summary> + +```yaml +jobs: + - name: local + url: http://127.0.0.1:9323/metrics + + - name: remote + url: http://192.0.2.1:9323/metrics + +``` +</details> + + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `docker_engine` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m docker_engine + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/metadata.yaml b/src/go/collectors/go.d.plugin/modules/docker_engine/metadata.yaml new file mode 100644 index 000000000..8f81d4e35 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/metadata.yaml @@ -0,0 +1,263 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-docker_engine + plugin_name: go.d.plugin + module_name: docker_engine + alternative_monitored_instances: [] + monitored_instance: + name: Docker Engine + link: https://docs.docker.com/engine/ + categories: + - data-collection.containers-and-vms + icon_filename: docker.svg + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - docker + - container + most_popular: false + overview: + data_collection: + metrics_description: | + This collector monitors the activity and health of Docker Engine and Docker Swarm. + method_description: | + The [built-in](https://docs.docker.com/config/daemon/prometheus/) Prometheus exporter is used to get the metrics. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + It discovers instances running on localhost by attempting to connect to a known Docker TCP socket: `http://127.0.0.1:9323/metrics`. + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: + - title: Enable built-in Prometheus exporter + description: | + To enable built-in Prometheus exporter, follow the [official documentation](https://docs.docker.com/config/daemon/prometheus/#configure-docker). + configuration: + file: + name: go.d/docker_engine.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: url + description: Server URL. + default_value: http://127.0.0.1:9323/metrics + required: true + - name: username + description: Username for basic HTTP authentication. + default_value: "" + required: false + - name: password + description: Password for basic HTTP authentication. + default_value: "" + required: false + - name: proxy_url + description: Proxy URL. + default_value: "" + required: false + - name: proxy_username + description: Username for proxy basic HTTP authentication. + default_value: "" + required: false + - name: proxy_password + description: Password for proxy basic HTTP authentication. + default_value: "" + required: false + - name: timeout + description: HTTP request timeout. + default_value: 1 + required: false + - name: method + description: HTTP request method. + default_value: GET + required: false + - name: body + description: HTTP request body. + default_value: "" + required: false + - name: headers + description: HTTP request headers. + default_value: "" + required: false + - name: not_follow_redirects + description: Redirect handling policy. Controls whether the client follows redirects. + default_value: "no" + required: false + - name: tls_skip_verify + description: Server certificate chain and hostname validation policy. Controls whether the client performs this check. + default_value: "no" + required: false + - name: tls_ca + description: Certification authority that the client uses when verifying the server's certificates. + default_value: "" + required: false + - name: tls_cert + description: Client TLS certificate. + default_value: "" + required: false + - name: tls_key + description: Client TLS key. + default_value: "" + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Basic + description: A basic example configuration. + folding: + enabled: false + config: | + jobs: + - name: local + url: http://127.0.0.1:9323/metrics + - name: HTTP authentication + description: Basic HTTP authentication. + config: | + jobs: + - name: local + url: http://127.0.0.1:9323/metrics + username: username + password: password + - name: HTTPS with self-signed certificate + description: Configuration with enabled HTTPS and self-signed certificate. + config: | + jobs: + - name: local + url: http://127.0.0.1:9323/metrics + tls_skip_verify: yes + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + url: http://127.0.0.1:9323/metrics + + - name: remote + url: http://192.0.2.1:9323/metrics + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: docker_engine.engine_daemon_container_actions + description: Container Actions + unit: actions/s + chart_type: stacked + dimensions: + - name: changes + - name: commit + - name: create + - name: delete + - name: start + - name: docker_engine.engine_daemon_container_states_containers + description: Containers In Various States + unit: containers + chart_type: stacked + dimensions: + - name: running + - name: paused + - name: stopped + - name: docker_engine.builder_builds_failed_total + description: Builder Builds Fails By Reason + unit: fails/s + chart_type: stacked + dimensions: + - name: build_canceled + - name: build_target_not_reachable_error + - name: command_not_supported_error + - name: dockerfile_empty_error + - name: dockerfile_syntax_error + - name: error_processing_commands_error + - name: missing_onbuild_arguments_error + - name: unknown_instruction_error + - name: docker_engine.engine_daemon_health_checks_failed_total + description: Health Checks + unit: events/s + chart_type: line + dimensions: + - name: fails + - name: docker_engine.swarm_manager_leader + description: Swarm Manager Leader + unit: bool + chart_type: line + dimensions: + - name: is_leader + - name: docker_engine.swarm_manager_object_store + description: Swarm Manager Object Store + unit: objects + chart_type: stacked + dimensions: + - name: nodes + - name: services + - name: tasks + - name: networks + - name: secrets + - name: configs + - name: docker_engine.swarm_manager_nodes_per_state + description: Swarm Manager Nodes Per State + unit: nodes + chart_type: stacked + dimensions: + - name: ready + - name: down + - name: unknown + - name: disconnected + - name: docker_engine.swarm_manager_tasks_per_state + description: Swarm Manager Tasks Per State + unit: tasks + chart_type: stacked + dimensions: + - name: running + - name: failed + - name: ready + - name: rejected + - name: starting + - name: shutdown + - name: new + - name: orphaned + - name: preparing + - name: pending + - name: complete + - name: remove + - name: accepted + - name: assigned diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/metrics.go b/src/go/collectors/go.d.plugin/modules/docker_engine/metrics.go new file mode 100644 index 000000000..4c84e8398 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/metrics.go @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker_engine + +type metrics struct { + Container struct { + Actions struct { + Changes float64 `stm:"changes"` + Commit float64 `stm:"commit"` + Create float64 `stm:"create"` + Delete float64 `stm:"delete"` + Start float64 `stm:"start"` + } `stm:"actions"` + States *containerStates `stm:"states"` + } `stm:"container"` + Builder struct { + FailsByReason struct { + BuildCanceled float64 `stm:"build_canceled"` + BuildTargetNotReachableError float64 `stm:"build_target_not_reachable_error"` + CommandNotSupportedError float64 `stm:"command_not_supported_error"` + DockerfileEmptyError float64 `stm:"dockerfile_empty_error"` + DockerfileSyntaxError float64 `stm:"dockerfile_syntax_error"` + ErrorProcessingCommandsError float64 `stm:"error_processing_commands_error"` + MissingOnbuildArgumentsError float64 `stm:"missing_onbuild_arguments_error"` + UnknownInstructionError float64 `stm:"unknown_instruction_error"` + } `stm:"fails"` + } `stm:"builder"` + HealthChecks struct { + Failed float64 `stm:"failed"` + } `stm:"health_checks"` + SwarmManager *swarmManager `stm:"swarm_manager"` +} + +type containerStates struct { + Paused float64 `stm:"paused"` + Running float64 `stm:"running"` + Stopped float64 `stm:"stopped"` +} + +type swarmManager struct { + IsLeader float64 `stm:"leader"` + Configs float64 `stm:"configs_total"` + Networks float64 `stm:"networks_total"` + Secrets float64 `stm:"secrets_total"` + Services float64 `stm:"services_total"` + Nodes struct { + Total float64 `stm:"total"` + PerState struct { + Disconnected float64 `stm:"disconnected"` + Down float64 `stm:"down"` + Ready float64 `stm:"ready"` + Unknown float64 `stm:"unknown"` + } `stm:"state"` + } `stm:"nodes"` + Tasks struct { + Total float64 `stm:"total"` + PerState struct { + Accepted float64 `stm:"accepted"` + Assigned float64 `stm:"assigned"` + Complete float64 `stm:"complete"` + Failed float64 `stm:"failed"` + New float64 `stm:"new"` + Orphaned float64 `stm:"orphaned"` + Pending float64 `stm:"pending"` + Preparing float64 `stm:"preparing"` + Ready float64 `stm:"ready"` + Rejected float64 `stm:"rejected"` + Remove float64 `stm:"remove"` + Running float64 `stm:"running"` + Shutdown float64 `stm:"shutdown"` + Starting float64 `stm:"starting"` + } `stm:"state"` + } `stm:"tasks"` +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.json b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.json new file mode 100644 index 000000000..984c3ed6e --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.json @@ -0,0 +1,20 @@ +{ + "update_every": 123, + "url": "ok", + "body": "ok", + "method": "ok", + "headers": { + "ok": "ok" + }, + "username": "ok", + "password": "ok", + "proxy_url": "ok", + "proxy_username": "ok", + "proxy_password": "ok", + "timeout": 123.123, + "not_follow_redirects": true, + "tls_ca": "ok", + "tls_cert": "ok", + "tls_key": "ok", + "tls_skip_verify": true +} diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.yaml new file mode 100644 index 000000000..8558b61cc --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/config.yaml @@ -0,0 +1,17 @@ +update_every: 123 +url: "ok" +body: "ok" +method: "ok" +headers: + ok: "ok" +username: "ok" +password: "ok" +proxy_url: "ok" +proxy_username: "ok" +proxy_password: "ok" +timeout: 123.123 +not_follow_redirects: yes +tls_ca: "ok" +tls_cert: "ok" +tls_key: "ok" +tls_skip_verify: yes diff --git a/libnetdata/buffered_reader/README.md b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/non-docker-engine.txt index e69de29bb..e69de29bb 100644 --- a/libnetdata/buffered_reader/README.md +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/non-docker-engine.txt diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v17.05.0-ce.txt b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v17.05.0-ce.txt new file mode 100644 index 000000000..8d175a8e9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v17.05.0-ce.txt @@ -0,0 +1,460 @@ +# HELP builder_builds_failed_total Number of failed image builds +# TYPE builder_builds_failed_total counter +builder_builds_failed_total{reason="build_canceled"} 1 +builder_builds_failed_total{reason="build_target_not_reachable_error"} 2 +builder_builds_failed_total{reason="command_not_supported_error"} 3 +builder_builds_failed_total{reason="dockerfile_empty_error"} 4 +builder_builds_failed_total{reason="dockerfile_syntax_error"} 5 +builder_builds_failed_total{reason="error_processing_commands_error"} 6 +builder_builds_failed_total{reason="missing_onbuild_arguments_error"} 7 +builder_builds_failed_total{reason="unknown_instruction_error"} 8 +# HELP builder_builds_triggered_total Number of triggered image builds +# TYPE builder_builds_triggered_total counter +builder_builds_triggered_total 0 +# HELP engine_daemon_container_actions_seconds The number of seconds it takes to process each container action +# TYPE engine_daemon_container_actions_seconds histogram +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="changes"} 0 +engine_daemon_container_actions_seconds_count{action="changes"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="commit"} 0 +engine_daemon_container_actions_seconds_count{action="commit"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="create"} 0 +engine_daemon_container_actions_seconds_count{action="create"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="delete"} 0 +engine_daemon_container_actions_seconds_count{action="delete"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="start"} 0 +engine_daemon_container_actions_seconds_count{action="start"} 1 +# HELP engine_daemon_engine_cpus_cpus The number of cpus that the host system of the engine has +# TYPE engine_daemon_engine_cpus_cpus gauge +engine_daemon_engine_cpus_cpus 4 +# HELP engine_daemon_engine_info The information related to the engine and the OS it is running on +# TYPE engine_daemon_engine_info gauge +engine_daemon_engine_info{architecture="x86_64",commit="774a1f4eee",daemon_id="NFZK:ZHHR:73WY:RV7D:MMU2:SE24:WWRJ:A3WN:WMMA:SPCL:PVO3:VGY7",graphdriver="overlay2",kernel="4.14.105-1-MANJARO",os="Manjaro Linux",os_type="linux",version="18.09.3-ce"} 1 +# HELP engine_daemon_engine_memory_bytes The number of bytes of memory that the host system of the engine has +# TYPE engine_daemon_engine_memory_bytes gauge +engine_daemon_engine_memory_bytes 2.5215361024e+10 +# HELP engine_daemon_events_subscribers_total The number of current subscribers to events +# TYPE engine_daemon_events_subscribers_total gauge +engine_daemon_events_subscribers_total 0 +# HELP engine_daemon_events_total The number of events logged +# TYPE engine_daemon_events_total counter +engine_daemon_events_total 0 +# HELP engine_daemon_health_checks_failed_total The total number of failed health checks +# TYPE engine_daemon_health_checks_failed_total counter +engine_daemon_health_checks_failed_total 33 +# HELP engine_daemon_health_checks_total The total number of health checks +# TYPE engine_daemon_health_checks_total counter +engine_daemon_health_checks_total 0 +# HELP etcd_debugging_snap_save_marshalling_duration_seconds The marshalling cost distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_marshalling_duration_seconds histogram +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_sum 0 +etcd_debugging_snap_save_marshalling_duration_seconds_count 0 +# HELP etcd_debugging_snap_save_total_duration_seconds The total latency distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_total_duration_seconds histogram +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_total_duration_seconds_sum 0 +etcd_debugging_snap_save_total_duration_seconds_count 0 +# HELP etcd_disk_wal_fsync_duration_seconds The latency distributions of fsync called by wal. +# TYPE etcd_disk_wal_fsync_duration_seconds histogram +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.001"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.002"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.004"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.008"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.016"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.032"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.064"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.128"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.256"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.512"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="1.024"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="2.048"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="4.096"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="8.192"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="+Inf"} 0 +etcd_disk_wal_fsync_duration_seconds_sum 0 +etcd_disk_wal_fsync_duration_seconds_count 0 +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 1.0085e-05 +go_gc_duration_seconds{quantile="0.25"} 3.1991e-05 +go_gc_duration_seconds{quantile="0.5"} 4.8062e-05 +go_gc_duration_seconds{quantile="0.75"} 9.067e-05 +go_gc_duration_seconds{quantile="1"} 0.000175239 +go_gc_duration_seconds_sum 0.000724173 +go_gc_duration_seconds_count 12 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 50 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 8.13368e+06 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 2.7343352e+07 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 1.454057e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 319815 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 2.398208e+06 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 8.13368e+06 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 5.5648256e+07 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.0477568e+07 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 114878 +# HELP go_memstats_heap_released_bytes_total Total number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes_total counter +go_memstats_heap_released_bytes_total 5.4738944e+07 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 6.6125824e+07 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.5528438390886765e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 0 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 434693 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 6944 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 16384 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 159696 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 196608 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 1.5134512e+07 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 1.112335e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 983040 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 983040 +# HELP go_memstats_sys_bytes Number of bytes obtained by system. Sum of all system allocations. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 7.2286456e+07 +# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} NaN +http_request_duration_microseconds_sum{handler="prometheus"} 0 +http_request_duration_microseconds_count{handler="prometheus"} 0 +# HELP http_request_size_bytes The HTTP request sizes in bytes. +# TYPE http_request_size_bytes summary +http_request_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_request_size_bytes_sum{handler="prometheus"} 0 +http_request_size_bytes_count{handler="prometheus"} 0 +# HELP http_response_size_bytes The HTTP response sizes in bytes. +# TYPE http_response_size_bytes summary +http_response_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_response_size_bytes_sum{handler="prometheus"} 0 +http_response_size_bytes_count{handler="prometheus"} 0 +# HELP logger_log_entries_size_greater_than_buffer_total Number of log entries which are larger than the log buffer +# TYPE logger_log_entries_size_greater_than_buffer_total counter +logger_log_entries_size_greater_than_buffer_total 0 +# HELP logger_log_read_operations_failed_total Number of log reads from container stdio that failed +# TYPE logger_log_read_operations_failed_total counter +logger_log_read_operations_failed_total 0 +# HELP logger_log_write_operations_failed_total Number of log write operations that failed +# TYPE logger_log_write_operations_failed_total counter +logger_log_write_operations_failed_total 0 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 2.12 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 24 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 8.5929984e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.55284287673e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.257283584e+09 +# HELP swarm_dispatcher_scheduling_delay_seconds Scheduling delay is the time a task takes to go from NEW to RUNNING state. +# TYPE swarm_dispatcher_scheduling_delay_seconds histogram +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.005"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.01"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.025"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.05"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.25"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="2.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="10"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="+Inf"} 0 +swarm_dispatcher_scheduling_delay_seconds_sum 0 +swarm_dispatcher_scheduling_delay_seconds_count 0 +# HELP swarm_manager_configs_total The number of configs in the cluster object store +# TYPE swarm_manager_configs_total gauge +swarm_manager_configs_total 0 +# HELP swarm_manager_leader Indicates if this manager node is a leader +# TYPE swarm_manager_leader gauge +swarm_manager_leader 0 +# HELP swarm_manager_networks_total The number of networks in the cluster object store +# TYPE swarm_manager_networks_total gauge +swarm_manager_networks_total 0 +# HELP swarm_manager_nodes The number of nodes +# TYPE swarm_manager_nodes gauge +swarm_manager_nodes{state="disconnected"} 0 +swarm_manager_nodes{state="down"} 0 +swarm_manager_nodes{state="ready"} 0 +swarm_manager_nodes{state="unknown"} 0 +# HELP swarm_manager_secrets_total The number of secrets in the cluster object store +# TYPE swarm_manager_secrets_total gauge +swarm_manager_secrets_total 0 +# HELP swarm_manager_services_total The number of services in the cluster object store +# TYPE swarm_manager_services_total gauge +swarm_manager_services_total 0 +# HELP swarm_manager_tasks_total The number of tasks in the cluster object store +# TYPE swarm_manager_tasks_total gauge +swarm_manager_tasks_total{state="accepted"} 0 +swarm_manager_tasks_total{state="assigned"} 0 +swarm_manager_tasks_total{state="complete"} 0 +swarm_manager_tasks_total{state="failed"} 0 +swarm_manager_tasks_total{state="new"} 0 +swarm_manager_tasks_total{state="orphaned"} 0 +swarm_manager_tasks_total{state="pending"} 0 +swarm_manager_tasks_total{state="preparing"} 0 +swarm_manager_tasks_total{state="ready"} 0 +swarm_manager_tasks_total{state="rejected"} 0 +swarm_manager_tasks_total{state="remove"} 0 +swarm_manager_tasks_total{state="running"} 0 +swarm_manager_tasks_total{state="shutdown"} 0 +swarm_manager_tasks_total{state="starting"} 0 +# HELP swarm_node_manager Whether this node is a manager or not +# TYPE swarm_node_manager gauge +swarm_node_manager 0 +# HELP swarm_raft_snapshot_latency_seconds Raft snapshot create latency. +# TYPE swarm_raft_snapshot_latency_seconds histogram +swarm_raft_snapshot_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="10"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_snapshot_latency_seconds_sum 0 +swarm_raft_snapshot_latency_seconds_count 0 +# HELP swarm_raft_transaction_latency_seconds Raft transaction latency. +# TYPE swarm_raft_transaction_latency_seconds histogram +swarm_raft_transaction_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="10"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_transaction_latency_seconds_sum 0 +swarm_raft_transaction_latency_seconds_count 0 +# HELP swarm_store_batch_latency_seconds Raft store batch latency. +# TYPE swarm_store_batch_latency_seconds histogram +swarm_store_batch_latency_seconds_bucket{le="0.005"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.01"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.025"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.05"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.1"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.25"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="1"} 0 +swarm_store_batch_latency_seconds_bucket{le="2.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="5"} 0 +swarm_store_batch_latency_seconds_bucket{le="10"} 0 +swarm_store_batch_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_batch_latency_seconds_sum 0 +swarm_store_batch_latency_seconds_count 0 +# HELP swarm_store_lookup_latency_seconds Raft store read latency. +# TYPE swarm_store_lookup_latency_seconds histogram +swarm_store_lookup_latency_seconds_bucket{le="0.005"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.01"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.025"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.05"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.25"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="2.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="10"} 0 +swarm_store_lookup_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_lookup_latency_seconds_sum 0 +swarm_store_lookup_latency_seconds_count 0 +# HELP swarm_store_memory_store_lock_duration_seconds Duration for which the raft memory store lock was held. +# TYPE swarm_store_memory_store_lock_duration_seconds histogram +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.005"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.01"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.025"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.05"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.25"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="2.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="10"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="+Inf"} 0 +swarm_store_memory_store_lock_duration_seconds_sum 0 +swarm_store_memory_store_lock_duration_seconds_count 0 +# HELP swarm_store_read_tx_latency_seconds Raft store read tx latency. +# TYPE swarm_store_read_tx_latency_seconds histogram +swarm_store_read_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_read_tx_latency_seconds_sum 0 +swarm_store_read_tx_latency_seconds_count 0 +# HELP swarm_store_write_tx_latency_seconds Raft store write tx latency. +# TYPE swarm_store_write_tx_latency_seconds histogram +swarm_store_write_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_write_tx_latency_seconds_sum 0 +swarm_store_write_tx_latency_seconds_count 0
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce-swarm.txt b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce-swarm.txt new file mode 100644 index 000000000..edd69abee --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce-swarm.txt @@ -0,0 +1,468 @@ +# HELP builder_builds_failed_total Number of failed image builds +# TYPE builder_builds_failed_total counter +builder_builds_failed_total{reason="build_canceled"} 1 +builder_builds_failed_total{reason="build_target_not_reachable_error"} 2 +builder_builds_failed_total{reason="command_not_supported_error"} 3 +builder_builds_failed_total{reason="dockerfile_empty_error"} 4 +builder_builds_failed_total{reason="dockerfile_syntax_error"} 5 +builder_builds_failed_total{reason="error_processing_commands_error"} 6 +builder_builds_failed_total{reason="missing_onbuild_arguments_error"} 7 +builder_builds_failed_total{reason="unknown_instruction_error"} 8 +# HELP builder_builds_triggered_total Number of triggered image builds +# TYPE builder_builds_triggered_total counter +builder_builds_triggered_total 0 +# HELP engine_daemon_container_actions_seconds The number of seconds it takes to process each container action +# TYPE engine_daemon_container_actions_seconds histogram +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="changes"} 0 +engine_daemon_container_actions_seconds_count{action="changes"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="commit"} 0 +engine_daemon_container_actions_seconds_count{action="commit"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="create"} 0 +engine_daemon_container_actions_seconds_count{action="create"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="delete"} 0 +engine_daemon_container_actions_seconds_count{action="delete"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="start"} 0 +engine_daemon_container_actions_seconds_count{action="start"} 1 +# HELP engine_daemon_container_states_containers The count of containers in various states +# TYPE engine_daemon_container_states_containers gauge +engine_daemon_container_states_containers{state="paused"} 11 +engine_daemon_container_states_containers{state="running"} 12 +engine_daemon_container_states_containers{state="stopped"} 13 +# HELP engine_daemon_engine_cpus_cpus The number of cpus that the host system of the engine has +# TYPE engine_daemon_engine_cpus_cpus gauge +engine_daemon_engine_cpus_cpus 4 +# HELP engine_daemon_engine_info The information related to the engine and the OS it is running on +# TYPE engine_daemon_engine_info gauge +engine_daemon_engine_info{architecture="x86_64",commit="774a1f4eee",daemon_id="NFZK:ZHHR:73WY:RV7D:MMU2:SE24:WWRJ:A3WN:WMMA:SPCL:PVO3:VGY7",graphdriver="overlay2",kernel="4.14.105-1-MANJARO",os="Manjaro Linux",os_type="linux",version="18.09.3-ce"} 1 +# HELP engine_daemon_engine_memory_bytes The number of bytes of memory that the host system of the engine has +# TYPE engine_daemon_engine_memory_bytes gauge +engine_daemon_engine_memory_bytes 2.5215361024e+10 +# HELP engine_daemon_events_subscribers_total The number of current subscribers to events +# TYPE engine_daemon_events_subscribers_total gauge +engine_daemon_events_subscribers_total 0 +# HELP engine_daemon_events_total The number of events logged +# TYPE engine_daemon_events_total counter +engine_daemon_events_total 0 +# HELP engine_daemon_health_checks_failed_total The total number of failed health checks +# TYPE engine_daemon_health_checks_failed_total counter +engine_daemon_health_checks_failed_total 33 +# HELP engine_daemon_health_checks_total The total number of health checks +# TYPE engine_daemon_health_checks_total counter +engine_daemon_health_checks_total 0 +# HELP etcd_debugging_snap_save_marshalling_duration_seconds The marshalling cost distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_marshalling_duration_seconds histogram +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_sum 0 +etcd_debugging_snap_save_marshalling_duration_seconds_count 0 +# HELP etcd_debugging_snap_save_total_duration_seconds The total latency distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_total_duration_seconds histogram +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_total_duration_seconds_sum 0 +etcd_debugging_snap_save_total_duration_seconds_count 0 +# HELP etcd_disk_wal_fsync_duration_seconds The latency distributions of fsync called by wal. +# TYPE etcd_disk_wal_fsync_duration_seconds histogram +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.001"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.002"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.004"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.008"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.016"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.032"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.064"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.128"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.256"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.512"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="1.024"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="2.048"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="4.096"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="8.192"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="+Inf"} 0 +etcd_disk_wal_fsync_duration_seconds_sum 0 +etcd_disk_wal_fsync_duration_seconds_count 0 +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 1.0085e-05 +go_gc_duration_seconds{quantile="0.25"} 3.1991e-05 +go_gc_duration_seconds{quantile="0.5"} 4.8062e-05 +go_gc_duration_seconds{quantile="0.75"} 9.067e-05 +go_gc_duration_seconds{quantile="1"} 0.000175239 +go_gc_duration_seconds_sum 0.000724173 +go_gc_duration_seconds_count 12 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 50 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 8.13368e+06 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 2.7343352e+07 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 1.454057e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 319815 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 2.398208e+06 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 8.13368e+06 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 5.5648256e+07 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.0477568e+07 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 114878 +# HELP go_memstats_heap_released_bytes_total Total number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes_total counter +go_memstats_heap_released_bytes_total 5.4738944e+07 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 6.6125824e+07 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.5528438390886765e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 0 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 434693 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 6944 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 16384 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 159696 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 196608 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 1.5134512e+07 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 1.112335e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 983040 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 983040 +# HELP go_memstats_sys_bytes Number of bytes obtained by system. Sum of all system allocations. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 7.2286456e+07 +# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} NaN +http_request_duration_microseconds_sum{handler="prometheus"} 0 +http_request_duration_microseconds_count{handler="prometheus"} 0 +# HELP http_request_size_bytes The HTTP request sizes in bytes. +# TYPE http_request_size_bytes summary +http_request_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_request_size_bytes_sum{handler="prometheus"} 0 +http_request_size_bytes_count{handler="prometheus"} 0 +# HELP http_response_size_bytes The HTTP response sizes in bytes. +# TYPE http_response_size_bytes summary +http_response_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_response_size_bytes_sum{handler="prometheus"} 0 +http_response_size_bytes_count{handler="prometheus"} 0 +# HELP logger_log_entries_size_greater_than_buffer_total Number of log entries which are larger than the log buffer +# TYPE logger_log_entries_size_greater_than_buffer_total counter +logger_log_entries_size_greater_than_buffer_total 0 +# HELP logger_log_read_operations_failed_total Number of log reads from container stdio that failed +# TYPE logger_log_read_operations_failed_total counter +logger_log_read_operations_failed_total 0 +# HELP logger_log_write_operations_failed_total Number of log write operations that failed +# TYPE logger_log_write_operations_failed_total counter +logger_log_write_operations_failed_total 0 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 2.12 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 24 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 8.5929984e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.55284287673e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.257283584e+09 +# HELP swarm_dispatcher_scheduling_delay_seconds Scheduling delay is the time a task takes to go from NEW to RUNNING state. +# TYPE swarm_dispatcher_scheduling_delay_seconds histogram +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.005"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.01"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.025"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.05"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.25"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="2.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="10"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="+Inf"} 0 +swarm_dispatcher_scheduling_delay_seconds_sum 0 +swarm_dispatcher_scheduling_delay_seconds_count 0 +# HELP swarm_manager_configs_total The number of configs in the cluster object store +# TYPE swarm_manager_configs_total gauge +swarm_manager_configs_total 1 +# HELP swarm_manager_leader Indicates if this manager node is a leader +# TYPE swarm_manager_leader gauge +swarm_manager_leader 1 +# HELP swarm_manager_networks_total The number of networks in the cluster object store +# TYPE swarm_manager_networks_total gauge +swarm_manager_networks_total 3 +# HELP swarm_manager_nodes The number of nodes +# TYPE swarm_manager_nodes gauge +swarm_manager_nodes{state="disconnected"} 1 +swarm_manager_nodes{state="down"} 2 +swarm_manager_nodes{state="ready"} 3 +swarm_manager_nodes{state="unknown"} 4 +# HELP swarm_manager_secrets_total The number of secrets in the cluster object store +# TYPE swarm_manager_secrets_total gauge +swarm_manager_secrets_total 1 +# HELP swarm_manager_services_total The number of services in the cluster object store +# TYPE swarm_manager_services_total gauge +swarm_manager_services_total 1 +# HELP swarm_manager_tasks_total The number of tasks in the cluster object store +# TYPE swarm_manager_tasks_total gauge +swarm_manager_tasks_total{state="accepted"} 1 +swarm_manager_tasks_total{state="assigned"} 2 +swarm_manager_tasks_total{state="complete"} 3 +swarm_manager_tasks_total{state="failed"} 4 +swarm_manager_tasks_total{state="new"} 5 +swarm_manager_tasks_total{state="orphaned"} 6 +swarm_manager_tasks_total{state="pending"} 7 +swarm_manager_tasks_total{state="preparing"} 8 +swarm_manager_tasks_total{state="ready"} 9 +swarm_manager_tasks_total{state="rejected"} 10 +swarm_manager_tasks_total{state="remove"} 11 +swarm_manager_tasks_total{state="running"} 12 +swarm_manager_tasks_total{state="shutdown"} 13 +swarm_manager_tasks_total{state="starting"} 14 +# HELP swarm_node_info Information related to the swarm +# TYPE swarm_node_info gauge +swarm_node_info{node_id="193816ofdqsg9kkm0hkfladvo",swarm_id="k1a6iu49n97a1vej3u5pjgsbr"} 1 +# HELP swarm_node_manager Whether this node is a manager or not +# TYPE swarm_node_manager gauge +swarm_node_manager 1 +# HELP swarm_raft_snapshot_latency_seconds Raft snapshot create latency. +# TYPE swarm_raft_snapshot_latency_seconds histogram +swarm_raft_snapshot_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="10"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_snapshot_latency_seconds_sum 0 +swarm_raft_snapshot_latency_seconds_count 0 +# HELP swarm_raft_transaction_latency_seconds Raft transaction latency. +# TYPE swarm_raft_transaction_latency_seconds histogram +swarm_raft_transaction_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="10"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_transaction_latency_seconds_sum 0 +swarm_raft_transaction_latency_seconds_count 0 +# HELP swarm_store_batch_latency_seconds Raft store batch latency. +# TYPE swarm_store_batch_latency_seconds histogram +swarm_store_batch_latency_seconds_bucket{le="0.005"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.01"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.025"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.05"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.1"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.25"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="1"} 0 +swarm_store_batch_latency_seconds_bucket{le="2.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="5"} 0 +swarm_store_batch_latency_seconds_bucket{le="10"} 0 +swarm_store_batch_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_batch_latency_seconds_sum 0 +swarm_store_batch_latency_seconds_count 0 +# HELP swarm_store_lookup_latency_seconds Raft store read latency. +# TYPE swarm_store_lookup_latency_seconds histogram +swarm_store_lookup_latency_seconds_bucket{le="0.005"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.01"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.025"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.05"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.25"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="2.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="10"} 0 +swarm_store_lookup_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_lookup_latency_seconds_sum 0 +swarm_store_lookup_latency_seconds_count 0 +# HELP swarm_store_memory_store_lock_duration_seconds Duration for which the raft memory store lock was held. +# TYPE swarm_store_memory_store_lock_duration_seconds histogram +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.005"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.01"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.025"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.05"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.25"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="2.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="10"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="+Inf"} 0 +swarm_store_memory_store_lock_duration_seconds_sum 0 +swarm_store_memory_store_lock_duration_seconds_count 0 +# HELP swarm_store_read_tx_latency_seconds Raft store read tx latency. +# TYPE swarm_store_read_tx_latency_seconds histogram +swarm_store_read_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_read_tx_latency_seconds_sum 0 +swarm_store_read_tx_latency_seconds_count 0 +# HELP swarm_store_write_tx_latency_seconds Raft store write tx latency. +# TYPE swarm_store_write_tx_latency_seconds histogram +swarm_store_write_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_write_tx_latency_seconds_sum 0 +swarm_store_write_tx_latency_seconds_count 0
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce.txt b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce.txt new file mode 100644 index 000000000..b54589210 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker_engine/testdata/v18.09.3-ce.txt @@ -0,0 +1,465 @@ +# HELP builder_builds_failed_total Number of failed image builds +# TYPE builder_builds_failed_total counter +builder_builds_failed_total{reason="build_canceled"} 1 +builder_builds_failed_total{reason="build_target_not_reachable_error"} 2 +builder_builds_failed_total{reason="command_not_supported_error"} 3 +builder_builds_failed_total{reason="dockerfile_empty_error"} 4 +builder_builds_failed_total{reason="dockerfile_syntax_error"} 5 +builder_builds_failed_total{reason="error_processing_commands_error"} 6 +builder_builds_failed_total{reason="missing_onbuild_arguments_error"} 7 +builder_builds_failed_total{reason="unknown_instruction_error"} 8 +# HELP builder_builds_triggered_total Number of triggered image builds +# TYPE builder_builds_triggered_total counter +builder_builds_triggered_total 0 +# HELP engine_daemon_container_actions_seconds The number of seconds it takes to process each container action +# TYPE engine_daemon_container_actions_seconds histogram +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="changes",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="changes"} 0 +engine_daemon_container_actions_seconds_count{action="changes"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="commit",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="commit"} 0 +engine_daemon_container_actions_seconds_count{action="commit"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="create",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="create"} 0 +engine_daemon_container_actions_seconds_count{action="create"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="delete",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="delete"} 0 +engine_daemon_container_actions_seconds_count{action="delete"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.005"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.01"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.025"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.05"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.25"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="0.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="1"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="2.5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="5"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="10"} 1 +engine_daemon_container_actions_seconds_bucket{action="start",le="+Inf"} 1 +engine_daemon_container_actions_seconds_sum{action="start"} 0 +engine_daemon_container_actions_seconds_count{action="start"} 1 +# HELP engine_daemon_container_states_containers The count of containers in various states +# TYPE engine_daemon_container_states_containers gauge +engine_daemon_container_states_containers{state="paused"} 11 +engine_daemon_container_states_containers{state="running"} 12 +engine_daemon_container_states_containers{state="stopped"} 13 +# HELP engine_daemon_engine_cpus_cpus The number of cpus that the host system of the engine has +# TYPE engine_daemon_engine_cpus_cpus gauge +engine_daemon_engine_cpus_cpus 4 +# HELP engine_daemon_engine_info The information related to the engine and the OS it is running on +# TYPE engine_daemon_engine_info gauge +engine_daemon_engine_info{architecture="x86_64",commit="774a1f4eee",daemon_id="NFZK:ZHHR:73WY:RV7D:MMU2:SE24:WWRJ:A3WN:WMMA:SPCL:PVO3:VGY7",graphdriver="overlay2",kernel="4.14.105-1-MANJARO",os="Manjaro Linux",os_type="linux",version="18.09.3-ce"} 1 +# HELP engine_daemon_engine_memory_bytes The number of bytes of memory that the host system of the engine has +# TYPE engine_daemon_engine_memory_bytes gauge +engine_daemon_engine_memory_bytes 2.5215361024e+10 +# HELP engine_daemon_events_subscribers_total The number of current subscribers to events +# TYPE engine_daemon_events_subscribers_total gauge +engine_daemon_events_subscribers_total 0 +# HELP engine_daemon_events_total The number of events logged +# TYPE engine_daemon_events_total counter +engine_daemon_events_total 0 +# HELP engine_daemon_health_checks_failed_total The total number of failed health checks +# TYPE engine_daemon_health_checks_failed_total counter +engine_daemon_health_checks_failed_total 33 +# HELP engine_daemon_health_checks_total The total number of health checks +# TYPE engine_daemon_health_checks_total counter +engine_daemon_health_checks_total 0 +# HELP etcd_debugging_snap_save_marshalling_duration_seconds The marshalling cost distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_marshalling_duration_seconds histogram +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_marshalling_duration_seconds_sum 0 +etcd_debugging_snap_save_marshalling_duration_seconds_count 0 +# HELP etcd_debugging_snap_save_total_duration_seconds The total latency distributions of save called by snapshot. +# TYPE etcd_debugging_snap_save_total_duration_seconds histogram +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.001"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.002"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.004"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.008"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.016"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.032"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.064"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.128"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.256"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="0.512"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="1.024"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="2.048"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="4.096"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="8.192"} 0 +etcd_debugging_snap_save_total_duration_seconds_bucket{le="+Inf"} 0 +etcd_debugging_snap_save_total_duration_seconds_sum 0 +etcd_debugging_snap_save_total_duration_seconds_count 0 +# HELP etcd_disk_wal_fsync_duration_seconds The latency distributions of fsync called by wal. +# TYPE etcd_disk_wal_fsync_duration_seconds histogram +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.001"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.002"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.004"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.008"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.016"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.032"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.064"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.128"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.256"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="0.512"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="1.024"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="2.048"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="4.096"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="8.192"} 0 +etcd_disk_wal_fsync_duration_seconds_bucket{le="+Inf"} 0 +etcd_disk_wal_fsync_duration_seconds_sum 0 +etcd_disk_wal_fsync_duration_seconds_count 0 +# HELP go_gc_duration_seconds A summary of the GC invocation durations. +# TYPE go_gc_duration_seconds summary +go_gc_duration_seconds{quantile="0"} 1.0085e-05 +go_gc_duration_seconds{quantile="0.25"} 3.1991e-05 +go_gc_duration_seconds{quantile="0.5"} 4.8062e-05 +go_gc_duration_seconds{quantile="0.75"} 9.067e-05 +go_gc_duration_seconds{quantile="1"} 0.000175239 +go_gc_duration_seconds_sum 0.000724173 +go_gc_duration_seconds_count 12 +# HELP go_goroutines Number of goroutines that currently exist. +# TYPE go_goroutines gauge +go_goroutines 50 +# HELP go_memstats_alloc_bytes Number of bytes allocated and still in use. +# TYPE go_memstats_alloc_bytes gauge +go_memstats_alloc_bytes 8.13368e+06 +# HELP go_memstats_alloc_bytes_total Total number of bytes allocated, even if freed. +# TYPE go_memstats_alloc_bytes_total counter +go_memstats_alloc_bytes_total 2.7343352e+07 +# HELP go_memstats_buck_hash_sys_bytes Number of bytes used by the profiling bucket hash table. +# TYPE go_memstats_buck_hash_sys_bytes gauge +go_memstats_buck_hash_sys_bytes 1.454057e+06 +# HELP go_memstats_frees_total Total number of frees. +# TYPE go_memstats_frees_total counter +go_memstats_frees_total 319815 +# HELP go_memstats_gc_sys_bytes Number of bytes used for garbage collection system metadata. +# TYPE go_memstats_gc_sys_bytes gauge +go_memstats_gc_sys_bytes 2.398208e+06 +# HELP go_memstats_heap_alloc_bytes Number of heap bytes allocated and still in use. +# TYPE go_memstats_heap_alloc_bytes gauge +go_memstats_heap_alloc_bytes 8.13368e+06 +# HELP go_memstats_heap_idle_bytes Number of heap bytes waiting to be used. +# TYPE go_memstats_heap_idle_bytes gauge +go_memstats_heap_idle_bytes 5.5648256e+07 +# HELP go_memstats_heap_inuse_bytes Number of heap bytes that are in use. +# TYPE go_memstats_heap_inuse_bytes gauge +go_memstats_heap_inuse_bytes 1.0477568e+07 +# HELP go_memstats_heap_objects Number of allocated objects. +# TYPE go_memstats_heap_objects gauge +go_memstats_heap_objects 114878 +# HELP go_memstats_heap_released_bytes_total Total number of heap bytes released to OS. +# TYPE go_memstats_heap_released_bytes_total counter +go_memstats_heap_released_bytes_total 5.4738944e+07 +# HELP go_memstats_heap_sys_bytes Number of heap bytes obtained from system. +# TYPE go_memstats_heap_sys_bytes gauge +go_memstats_heap_sys_bytes 6.6125824e+07 +# HELP go_memstats_last_gc_time_seconds Number of seconds since 1970 of last garbage collection. +# TYPE go_memstats_last_gc_time_seconds gauge +go_memstats_last_gc_time_seconds 1.5528438390886765e+09 +# HELP go_memstats_lookups_total Total number of pointer lookups. +# TYPE go_memstats_lookups_total counter +go_memstats_lookups_total 0 +# HELP go_memstats_mallocs_total Total number of mallocs. +# TYPE go_memstats_mallocs_total counter +go_memstats_mallocs_total 434693 +# HELP go_memstats_mcache_inuse_bytes Number of bytes in use by mcache structures. +# TYPE go_memstats_mcache_inuse_bytes gauge +go_memstats_mcache_inuse_bytes 6944 +# HELP go_memstats_mcache_sys_bytes Number of bytes used for mcache structures obtained from system. +# TYPE go_memstats_mcache_sys_bytes gauge +go_memstats_mcache_sys_bytes 16384 +# HELP go_memstats_mspan_inuse_bytes Number of bytes in use by mspan structures. +# TYPE go_memstats_mspan_inuse_bytes gauge +go_memstats_mspan_inuse_bytes 159696 +# HELP go_memstats_mspan_sys_bytes Number of bytes used for mspan structures obtained from system. +# TYPE go_memstats_mspan_sys_bytes gauge +go_memstats_mspan_sys_bytes 196608 +# HELP go_memstats_next_gc_bytes Number of heap bytes when next garbage collection will take place. +# TYPE go_memstats_next_gc_bytes gauge +go_memstats_next_gc_bytes 1.5134512e+07 +# HELP go_memstats_other_sys_bytes Number of bytes used for other system allocations. +# TYPE go_memstats_other_sys_bytes gauge +go_memstats_other_sys_bytes 1.112335e+06 +# HELP go_memstats_stack_inuse_bytes Number of bytes in use by the stack allocator. +# TYPE go_memstats_stack_inuse_bytes gauge +go_memstats_stack_inuse_bytes 983040 +# HELP go_memstats_stack_sys_bytes Number of bytes obtained from system for stack allocator. +# TYPE go_memstats_stack_sys_bytes gauge +go_memstats_stack_sys_bytes 983040 +# HELP go_memstats_sys_bytes Number of bytes obtained by system. Sum of all system allocations. +# TYPE go_memstats_sys_bytes gauge +go_memstats_sys_bytes 7.2286456e+07 +# HELP http_request_duration_microseconds The HTTP request latencies in microseconds. +# TYPE http_request_duration_microseconds summary +http_request_duration_microseconds{handler="prometheus",quantile="0.5"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.9"} NaN +http_request_duration_microseconds{handler="prometheus",quantile="0.99"} NaN +http_request_duration_microseconds_sum{handler="prometheus"} 0 +http_request_duration_microseconds_count{handler="prometheus"} 0 +# HELP http_request_size_bytes The HTTP request sizes in bytes. +# TYPE http_request_size_bytes summary +http_request_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_request_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_request_size_bytes_sum{handler="prometheus"} 0 +http_request_size_bytes_count{handler="prometheus"} 0 +# HELP http_response_size_bytes The HTTP response sizes in bytes. +# TYPE http_response_size_bytes summary +http_response_size_bytes{handler="prometheus",quantile="0.5"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.9"} NaN +http_response_size_bytes{handler="prometheus",quantile="0.99"} NaN +http_response_size_bytes_sum{handler="prometheus"} 0 +http_response_size_bytes_count{handler="prometheus"} 0 +# HELP logger_log_entries_size_greater_than_buffer_total Number of log entries which are larger than the log buffer +# TYPE logger_log_entries_size_greater_than_buffer_total counter +logger_log_entries_size_greater_than_buffer_total 0 +# HELP logger_log_read_operations_failed_total Number of log reads from container stdio that failed +# TYPE logger_log_read_operations_failed_total counter +logger_log_read_operations_failed_total 0 +# HELP logger_log_write_operations_failed_total Number of log write operations that failed +# TYPE logger_log_write_operations_failed_total counter +logger_log_write_operations_failed_total 0 +# HELP process_cpu_seconds_total Total user and system CPU time spent in seconds. +# TYPE process_cpu_seconds_total counter +process_cpu_seconds_total 2.12 +# HELP process_max_fds Maximum number of open file descriptors. +# TYPE process_max_fds gauge +process_max_fds 1.048576e+06 +# HELP process_open_fds Number of open file descriptors. +# TYPE process_open_fds gauge +process_open_fds 24 +# HELP process_resident_memory_bytes Resident memory size in bytes. +# TYPE process_resident_memory_bytes gauge +process_resident_memory_bytes 8.5929984e+07 +# HELP process_start_time_seconds Start time of the process since unix epoch in seconds. +# TYPE process_start_time_seconds gauge +process_start_time_seconds 1.55284287673e+09 +# HELP process_virtual_memory_bytes Virtual memory size in bytes. +# TYPE process_virtual_memory_bytes gauge +process_virtual_memory_bytes 1.257283584e+09 +# HELP swarm_dispatcher_scheduling_delay_seconds Scheduling delay is the time a task takes to go from NEW to RUNNING state. +# TYPE swarm_dispatcher_scheduling_delay_seconds histogram +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.005"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.01"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.025"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.05"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.25"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="0.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="1"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="2.5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="5"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="10"} 0 +swarm_dispatcher_scheduling_delay_seconds_bucket{le="+Inf"} 0 +swarm_dispatcher_scheduling_delay_seconds_sum 0 +swarm_dispatcher_scheduling_delay_seconds_count 0 +# HELP swarm_manager_configs_total The number of configs in the cluster object store +# TYPE swarm_manager_configs_total gauge +swarm_manager_configs_total 0 +# HELP swarm_manager_leader Indicates if this manager node is a leader +# TYPE swarm_manager_leader gauge +swarm_manager_leader 0 +# HELP swarm_manager_networks_total The number of networks in the cluster object store +# TYPE swarm_manager_networks_total gauge +swarm_manager_networks_total 0 +# HELP swarm_manager_nodes The number of nodes +# TYPE swarm_manager_nodes gauge +swarm_manager_nodes{state="disconnected"} 0 +swarm_manager_nodes{state="down"} 0 +swarm_manager_nodes{state="ready"} 0 +swarm_manager_nodes{state="unknown"} 0 +# HELP swarm_manager_secrets_total The number of secrets in the cluster object store +# TYPE swarm_manager_secrets_total gauge +swarm_manager_secrets_total 0 +# HELP swarm_manager_services_total The number of services in the cluster object store +# TYPE swarm_manager_services_total gauge +swarm_manager_services_total 0 +# HELP swarm_manager_tasks_total The number of tasks in the cluster object store +# TYPE swarm_manager_tasks_total gauge +swarm_manager_tasks_total{state="accepted"} 0 +swarm_manager_tasks_total{state="assigned"} 0 +swarm_manager_tasks_total{state="complete"} 0 +swarm_manager_tasks_total{state="failed"} 0 +swarm_manager_tasks_total{state="new"} 0 +swarm_manager_tasks_total{state="orphaned"} 0 +swarm_manager_tasks_total{state="pending"} 0 +swarm_manager_tasks_total{state="preparing"} 0 +swarm_manager_tasks_total{state="ready"} 0 +swarm_manager_tasks_total{state="rejected"} 0 +swarm_manager_tasks_total{state="remove"} 0 +swarm_manager_tasks_total{state="running"} 0 +swarm_manager_tasks_total{state="shutdown"} 0 +swarm_manager_tasks_total{state="starting"} 0 +# HELP swarm_node_manager Whether this node is a manager or not +# TYPE swarm_node_manager gauge +swarm_node_manager 0 +# HELP swarm_raft_snapshot_latency_seconds Raft snapshot create latency. +# TYPE swarm_raft_snapshot_latency_seconds histogram +swarm_raft_snapshot_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="1"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="5"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="10"} 0 +swarm_raft_snapshot_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_snapshot_latency_seconds_sum 0 +swarm_raft_snapshot_latency_seconds_count 0 +# HELP swarm_raft_transaction_latency_seconds Raft transaction latency. +# TYPE swarm_raft_transaction_latency_seconds histogram +swarm_raft_transaction_latency_seconds_bucket{le="0.005"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.01"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.025"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.05"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.25"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="0.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="1"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="2.5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="5"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="10"} 0 +swarm_raft_transaction_latency_seconds_bucket{le="+Inf"} 0 +swarm_raft_transaction_latency_seconds_sum 0 +swarm_raft_transaction_latency_seconds_count 0 +# HELP swarm_store_batch_latency_seconds Raft store batch latency. +# TYPE swarm_store_batch_latency_seconds histogram +swarm_store_batch_latency_seconds_bucket{le="0.005"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.01"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.025"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.05"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.1"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.25"} 0 +swarm_store_batch_latency_seconds_bucket{le="0.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="1"} 0 +swarm_store_batch_latency_seconds_bucket{le="2.5"} 0 +swarm_store_batch_latency_seconds_bucket{le="5"} 0 +swarm_store_batch_latency_seconds_bucket{le="10"} 0 +swarm_store_batch_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_batch_latency_seconds_sum 0 +swarm_store_batch_latency_seconds_count 0 +# HELP swarm_store_lookup_latency_seconds Raft store read latency. +# TYPE swarm_store_lookup_latency_seconds histogram +swarm_store_lookup_latency_seconds_bucket{le="0.005"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.01"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.025"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.05"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.25"} 0 +swarm_store_lookup_latency_seconds_bucket{le="0.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="1"} 0 +swarm_store_lookup_latency_seconds_bucket{le="2.5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="5"} 0 +swarm_store_lookup_latency_seconds_bucket{le="10"} 0 +swarm_store_lookup_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_lookup_latency_seconds_sum 0 +swarm_store_lookup_latency_seconds_count 0 +# HELP swarm_store_memory_store_lock_duration_seconds Duration for which the raft memory store lock was held. +# TYPE swarm_store_memory_store_lock_duration_seconds histogram +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.005"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.01"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.025"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.05"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.25"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="0.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="1"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="2.5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="5"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="10"} 0 +swarm_store_memory_store_lock_duration_seconds_bucket{le="+Inf"} 0 +swarm_store_memory_store_lock_duration_seconds_sum 0 +swarm_store_memory_store_lock_duration_seconds_count 0 +# HELP swarm_store_read_tx_latency_seconds Raft store read tx latency. +# TYPE swarm_store_read_tx_latency_seconds histogram +swarm_store_read_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_read_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_read_tx_latency_seconds_sum 0 +swarm_store_read_tx_latency_seconds_count 0 +# HELP swarm_store_write_tx_latency_seconds Raft store write tx latency. +# TYPE swarm_store_write_tx_latency_seconds histogram +swarm_store_write_tx_latency_seconds_bucket{le="0.005"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.01"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.025"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.05"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.25"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="0.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="1"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="2.5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="5"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="10"} 0 +swarm_store_write_tx_latency_seconds_bucket{le="+Inf"} 0 +swarm_store_write_tx_latency_seconds_sum 0 +swarm_store_write_tx_latency_seconds_count 0
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/README.md b/src/go/collectors/go.d.plugin/modules/dockerhub/README.md new file mode 120000 index 000000000..703add4ed --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/README.md @@ -0,0 +1 @@ +integrations/docker_hub_repository.md
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/apiclient.go b/src/go/collectors/go.d.plugin/modules/dockerhub/apiclient.go new file mode 100644 index 000000000..fa6e1c805 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/apiclient.go @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "path" + + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +type repository struct { + User string + Name string + Status int + StarCount int `json:"star_count"` + PullCount int `json:"pull_count"` + LastUpdated string `json:"last_updated"` +} + +func newAPIClient(client *http.Client, request web.Request) *apiClient { + return &apiClient{httpClient: client, request: request} +} + +type apiClient struct { + httpClient *http.Client + request web.Request +} + +func (a apiClient) getRepository(repoName string) (*repository, error) { + req, err := a.createRequest(repoName) + if err != nil { + return nil, fmt.Errorf("error on creating http request : %v", err) + } + + resp, err := a.doRequestOK(req) + defer closeBody(resp) + if err != nil { + return nil, err + } + + var repo repository + if err := json.NewDecoder(resp.Body).Decode(&repo); err != nil { + return nil, fmt.Errorf("error on parsing response from %s : %v", req.URL, err) + } + + return &repo, nil +} + +func (a apiClient) doRequestOK(req *http.Request) (*http.Response, error) { + resp, err := a.httpClient.Do(req) + if err != nil { + return nil, fmt.Errorf("error on request: %v", err) + } + + if resp.StatusCode != http.StatusOK { + return resp, fmt.Errorf("%s returned HTTP status %d", req.URL, resp.StatusCode) + } + return resp, nil +} + +func (a apiClient) createRequest(urlPath string) (*http.Request, error) { + req := a.request.Copy() + u, err := url.Parse(req.URL) + if err != nil { + return nil, err + } + + u.Path = path.Join(u.Path, urlPath) + req.URL = u.String() + return web.NewHTTPRequest(req) +} + +func closeBody(resp *http.Response) { + if resp != nil && resp.Body != nil { + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() + } +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/charts.go b/src/go/collectors/go.d.plugin/modules/dockerhub/charts.go new file mode 100644 index 000000000..07ba8e18b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/charts.go @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +type ( + // Charts is an alias for module.Charts + Charts = module.Charts + // Dims is an alias for module.Dims + Dims = module.Dims + // Dim is an alias for module.Dim + Dim = module.Dim +) + +var charts = Charts{ + { + ID: "pulls_sum", + Title: "Pulls Summary", + Units: "pulls", + Fam: "pulls", + Dims: Dims{ + {ID: "pull_sum", Name: "sum"}, + }, + }, + { + ID: "pulls", + Title: "Pulls", + Units: "pulls", + Fam: "pulls", + Type: module.Stacked, + }, + { + ID: "pulls_rate", + Title: "Pulls Rate", + Units: "pulls/s", + Fam: "pulls", + Type: module.Stacked, + }, + { + ID: "stars", + Title: "Stars", + Units: "stars", + Fam: "stars", + Type: module.Stacked, + }, + { + ID: "status", + Title: "Current Status", + Units: "status", + Fam: "status", + }, + { + ID: "last_updated", + Title: "Time Since Last Updated", + Units: "seconds", + Fam: "last updated", + }, +} + +func addReposToCharts(repositories []string, cs *Charts) { + for _, name := range repositories { + dimName := strings.Replace(name, "/", "_", -1) + _ = cs.Get("pulls").AddDim(&Dim{ + ID: "pull_count_" + name, + Name: dimName, + }) + _ = cs.Get("pulls_rate").AddDim(&Dim{ + ID: "pull_count_" + name, + Name: dimName, + Algo: module.Incremental, + }) + _ = cs.Get("stars").AddDim(&Dim{ + ID: "star_count_" + name, + Name: dimName, + }) + _ = cs.Get("status").AddDim(&Dim{ + ID: "status_" + name, + Name: dimName, + }) + _ = cs.Get("last_updated").AddDim(&Dim{ + ID: "last_updated_" + name, + Name: dimName, + }) + } +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/collect.go b/src/go/collectors/go.d.plugin/modules/dockerhub/collect.go new file mode 100644 index 000000000..211c1ea7c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/collect.go @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + "fmt" + "time" +) + +func (dh *DockerHub) collect() (map[string]int64, error) { + var ( + reposNum = len(dh.Repositories) + ch = make(chan *repository, reposNum) + mx = make(map[string]int64) + ) + + for _, name := range dh.Repositories { + go dh.collectRepo(name, ch) + } + + var ( + parsed int + pullSum int + ) + + for i := 0; i < reposNum; i++ { + repo := <-ch + if repo == nil { + continue + } + if err := parseRepoTo(repo, mx); err != nil { + dh.Errorf("error on parsing %s/%s : %v", repo.User, repo.Name, err) + continue + } + pullSum += repo.PullCount + parsed++ + } + close(ch) + + if parsed == reposNum { + mx["pull_sum"] = int64(pullSum) + } + + return mx, nil +} + +func (dh *DockerHub) collectRepo(repoName string, ch chan *repository) { + repo, err := dh.client.getRepository(repoName) + if err != nil { + dh.Error(err) + } + ch <- repo +} + +func parseRepoTo(repo *repository, mx map[string]int64) error { + t, err := time.Parse(time.RFC3339Nano, repo.LastUpdated) + if err != nil { + return err + } + mx[fmt.Sprintf("last_updated_%s/%s", repo.User, repo.Name)] = int64(time.Since(t).Seconds()) + mx[fmt.Sprintf("star_count_%s/%s", repo.User, repo.Name)] = int64(repo.StarCount) + mx[fmt.Sprintf("pull_count_%s/%s", repo.User, repo.Name)] = int64(repo.PullCount) + mx[fmt.Sprintf("status_%s/%s", repo.User, repo.Name)] = int64(repo.Status) + return nil +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/config_schema.json b/src/go/collectors/go.d.plugin/modules/dockerhub/config_schema.json new file mode 100644 index 000000000..a8d16888b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/config_schema.json @@ -0,0 +1,183 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "DockerHub collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 5 + }, + "url": { + "title": "URL", + "description": "The URL of the DockerHub repositories endpoint.", + "type": "string", + "default": "https://hub.docker.com/v2/repositories", + "format": "uri" + }, + "timeout": { + "title": "Timeout", + "description": "The timeout in seconds for the HTTP request.", + "type": "number", + "minimum": 0.5, + "default": 2 + }, + "repositories": { + "title": "Repositories", + "description": "List of repositories to monitor.", + "type": [ + "array", + "null" + ], + "items": { + "title": "Name", + "description": "The name of the repository.", + "type": "string" + }, + "uniqueItems": true, + "minItems": 1 + }, + "not_follow_redirects": { + "title": "Not follow redirects", + "description": "If set, the client will not follow HTTP redirects automatically.", + "type": "boolean" + }, + "username": { + "title": "Username", + "description": "The username for basic authentication.", + "type": "string", + "sensitive": true + }, + "password": { + "title": "Password", + "description": "The password for basic authentication.", + "type": "string", + "sensitive": true + }, + "proxy_url": { + "title": "Proxy URL", + "description": "The URL of the proxy server.", + "type": "string" + }, + "proxy_username": { + "title": "Proxy username", + "description": "The username for proxy authentication.", + "type": "string", + "sensitive": true + }, + "proxy_password": { + "title": "Proxy password", + "description": "The password for proxy authentication.", + "type": "string", + "sensitive": true + }, + "headers": { + "title": "Headers", + "description": "Additional HTTP headers to include in the request.", + "type": [ + "object", + "null" + ], + "additionalProperties": { + "type": "string" + } + }, + "tls_skip_verify": { + "title": "Skip TLS verification", + "description": "If set, TLS certificate verification will be skipped.", + "type": "boolean" + }, + "tls_ca": { + "title": "TLS CA", + "description": "The path to the CA certificate file for TLS verification.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_cert": { + "title": "TLS certificate", + "description": "The path to the client certificate file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + }, + "tls_key": { + "title": "TLS key", + "description": "The path to the client key file for TLS authentication.", + "type": "string", + "pattern": "^$|^/" + } + }, + "required": [ + "url", + "repositories" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "ui:flavour": "tabs", + "ui:options": { + "tabs": [ + { + "title": "Base", + "fields": [ + "update_every", + "url", + "timeout", + "repositories", + "not_follow_redirects" + ] + }, + { + "title": "Auth", + "fields": [ + "username", + "password" + ] + }, + { + "title": "TLS", + "fields": [ + "tls_skip_verify", + "tls_ca", + "tls_cert", + "tls_key" + ] + }, + { + "title": "Proxy", + "fields": [ + "proxy_url", + "proxy_username", + "proxy_password" + ] + }, + { + "title": "Headers", + "fields": [ + "headers" + ] + } + ] + }, + "uiOptions": { + "fullPage": true + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + }, + "repositories": { + "ui:listFlavour": "list" + }, + "password": { + "ui:widget": "password" + }, + "proxy_password": { + "ui:widget": "password" + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub.go b/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub.go new file mode 100644 index 000000000..d717ff174 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub.go @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("dockerhub", module.Creator{ + JobConfigSchema: configSchema, + Defaults: module.Defaults{ + UpdateEvery: 5, + }, + Create: func() module.Module { return New() }, + }) +} + +func New() *DockerHub { + return &DockerHub{ + Config: Config{ + HTTP: web.HTTP{ + Request: web.Request{ + URL: "https://hub.docker.com/v2/repositories", + }, + Client: web.Client{ + Timeout: web.Duration(time.Second * 2), + }, + }, + }, + } +} + +type Config struct { + web.HTTP `yaml:",inline" json:""` + UpdateEvery int `yaml:"update_every" json:"update_every"` + Repositories []string `yaml:"repositories" json:"repositories"` +} + +type DockerHub struct { + module.Base + Config `yaml:",inline" json:""` + + client *apiClient +} + +func (dh *DockerHub) Configuration() any { + return dh.Config +} + +func (dh *DockerHub) Init() error { + if err := dh.validateConfig(); err != nil { + dh.Errorf("config validation: %v", err) + return err + } + + client, err := dh.initApiClient() + if err != nil { + dh.Error(err) + return err + } + dh.client = client + + return nil +} + +func (dh *DockerHub) Check() error { + mx, err := dh.collect() + if err != nil { + dh.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + + } + return nil +} + +func (dh *DockerHub) Charts() *Charts { + cs := charts.Copy() + addReposToCharts(dh.Repositories, cs) + return cs +} + +func (dh *DockerHub) Collect() map[string]int64 { + mx, err := dh.collect() + + if err != nil { + dh.Error(err) + return nil + } + + return mx +} + +func (dh *DockerHub) Cleanup() { + if dh.client != nil && dh.client.httpClient != nil { + dh.client.httpClient.CloseIdleConnections() + } +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub_test.go b/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub_test.go new file mode 100644 index 000000000..7036ff7a7 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/dockerhub_test.go @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + "net/http" + "net/http/httptest" + "os" + "strings" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") + + dataRepo1, _ = os.ReadFile("testdata/repo1.txt") + dataRepo2, _ = os.ReadFile("testdata/repo2.txt") + dataRepo3, _ = os.ReadFile("testdata/repo3.txt") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + "dataRepo1": dataRepo1, + "dataRepo2": dataRepo2, + "dataRepo3": dataRepo3, + } { + require.NotNil(t, data, name) + } +} + +func TestDockerHub_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &DockerHub{}, dataConfigJSON, dataConfigYAML) +} + +func TestDockerHub_Charts(t *testing.T) { assert.NotNil(t, New().Charts()) } + +func TestDockerHub_Cleanup(t *testing.T) { New().Cleanup() } + +func TestDockerHub_Init(t *testing.T) { + job := New() + job.Repositories = []string{"name/repo"} + assert.NoError(t, job.Init()) + assert.NotNil(t, job.client) +} + +func TestDockerHub_InitNG(t *testing.T) { + assert.Error(t, New().Init()) +} + +func TestDockerHub_Check(t *testing.T) { + ts := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.HasSuffix(r.URL.Path, "name1/repo1"): + _, _ = w.Write(dataRepo1) + case strings.HasSuffix(r.URL.Path, "name2/repo2"): + _, _ = w.Write(dataRepo2) + case strings.HasSuffix(r.URL.Path, "name3/repo3"): + _, _ = w.Write(dataRepo3) + } + })) + defer ts.Close() + + job := New() + job.URL = ts.URL + job.Repositories = []string{"name1/repo1", "name2/repo2", "name3/repo3"} + require.NoError(t, job.Init()) + assert.NoError(t, job.Check()) +} + +func TestDockerHub_CheckNG(t *testing.T) { + job := New() + job.URL = "http://127.0.0.1:38001/metrics" + job.Repositories = []string{"name1/repo1", "name2/repo2", "name3/repo3"} + require.NoError(t, job.Init()) + assert.Error(t, job.Check()) +} + +func TestDockerHub_Collect(t *testing.T) { + ts := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + switch { + case strings.HasSuffix(r.URL.Path, "name1/repo1"): + _, _ = w.Write(dataRepo1) + case strings.HasSuffix(r.URL.Path, "name2/repo2"): + _, _ = w.Write(dataRepo2) + case strings.HasSuffix(r.URL.Path, "name3/repo3"): + _, _ = w.Write(dataRepo3) + } + })) + defer ts.Close() + + job := New() + job.URL = ts.URL + job.Repositories = []string{"name1/repo1", "name2/repo2", "name3/repo3"} + require.NoError(t, job.Init()) + require.NoError(t, job.Check()) + + expected := map[string]int64{ + "star_count_user1/name1": 45, + "pull_count_user1/name1": 18540191, + "status_user1/name1": 1, + "star_count_user2/name2": 45, + "pull_count_user2/name2": 18540192, + "status_user2/name2": 1, + "star_count_user3/name3": 45, + "pull_count_user3/name3": 18540193, + "status_user3/name3": 1, + "pull_sum": 55620576, + } + + collected := job.Collect() + + for k := range collected { + if strings.HasPrefix(k, "last") { + delete(collected, k) + } + } + assert.Equal(t, expected, collected) +} + +func TestDockerHub_InvalidData(t *testing.T) { + ts := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("hello and goodbye")) + })) + defer ts.Close() + + job := New() + job.URL = ts.URL + job.Repositories = []string{"name1/repo1", "name2/repo2", "name3/repo3"} + require.NoError(t, job.Init()) + assert.Error(t, job.Check()) +} + +func TestDockerHub_404(t *testing.T) { + ts := httptest.NewServer( + http.HandlerFunc( + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer ts.Close() + + job := New() + job.Repositories = []string{"name1/repo1", "name2/repo2", "name3/repo3"} + require.NoError(t, job.Init()) + assert.Error(t, job.Check()) +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/init.go b/src/go/collectors/go.d.plugin/modules/dockerhub/init.go new file mode 100644 index 000000000..245bee1cb --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/init.go @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package dockerhub + +import ( + "errors" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" +) + +func (dh *DockerHub) validateConfig() error { + if dh.URL == "" { + return errors.New("url not set") + } + if len(dh.Repositories) == 0 { + return errors.New("repositories not set") + } + return nil +} + +func (dh *DockerHub) initApiClient() (*apiClient, error) { + client, err := web.NewHTTPClient(dh.Client) + if err != nil { + return nil, err + } + return newAPIClient(client, dh.Request), nil +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/integrations/docker_hub_repository.md b/src/go/collectors/go.d.plugin/modules/dockerhub/integrations/docker_hub_repository.md new file mode 100644 index 000000000..9e16ce4b2 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/integrations/docker_hub_repository.md @@ -0,0 +1,174 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/dockerhub/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/dockerhub/metadata.yaml" +sidebar_label: "Docker Hub repository" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/Containers and VMs" +most_popular: False +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Docker Hub repository + + +<img src="https://netdata.cloud/img/docker.svg" width="150"/> + + +Plugin: go.d.plugin +Module: dockerhub + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector keeps track of DockerHub repositories statistics such as the number of stars, pulls, current status, and more. + + + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +This integration doesn't support auto-detection. + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Docker Hub repository instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| dockerhub.pulls_sum | sum | pulls | +| dockerhub.pulls | a dimension per repository | pulls | +| dockerhub.pulls_rate | a dimension per repository | pulls/s | +| dockerhub.stars | a dimension per repository | stars | +| dockerhub.status | a dimension per repository | status | +| dockerhub.last_updated | a dimension per repository | seconds | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/dockerhub.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/dockerhub.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| url | DockerHub URL. | https://hub.docker.com/v2/repositories | yes | +| repositories | List of repositories to monitor. | | yes | +| timeout | HTTP request timeout. | 1 | no | +| username | Username for basic HTTP authentication. | | no | +| password | Password for basic HTTP authentication. | | no | +| proxy_url | Proxy URL. | | no | +| proxy_username | Username for proxy basic HTTP authentication. | | no | +| proxy_password | Password for proxy basic HTTP authentication. | | no | +| method | HTTP request method. | GET | no | +| body | HTTP request body. | | no | +| headers | HTTP request headers. | | no | +| not_follow_redirects | Redirect handling policy. Controls whether the client follows redirects. | no | no | +| tls_skip_verify | Server certificate chain and hostname validation policy. Controls whether the client performs this check. | no | no | +| tls_ca | Certification authority that the client uses when verifying the server's certificates. | | no | +| tls_cert | Client TLS certificate. | | no | +| tls_key | Client TLS key. | | no | + +</details> + +#### Examples + +##### Basic + +A basic example configuration. + +```yaml +jobs: + - name: dockerhub + repositories: + - 'user1/name1' + - 'user2/name2' + - 'user3/name3' + +``` + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `dockerhub` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m dockerhub + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/metadata.yaml b/src/go/collectors/go.d.plugin/modules/dockerhub/metadata.yaml new file mode 100644 index 000000000..605d6c1cb --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/metadata.yaml @@ -0,0 +1,190 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-dockerhub + plugin_name: go.d.plugin + module_name: dockerhub + monitored_instance: + name: Docker Hub repository + link: https://hub.docker.com/ + icon_filename: docker.svg + categories: + - data-collection.containers-and-vms # FIXME + keywords: + - dockerhub + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + most_popular: false + overview: + data_collection: + metrics_description: | + This collector keeps track of DockerHub repositories statistics such as the number of stars, pulls, current status, and more. + method_description: "" + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: "" + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/dockerhub.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: url + description: DockerHub URL. + default_value: https://hub.docker.com/v2/repositories + required: true + - name: repositories + description: List of repositories to monitor. + default_value: "" + required: true + - name: timeout + description: HTTP request timeout. + default_value: 1 + required: false + - name: username + description: Username for basic HTTP authentication. + default_value: "" + required: false + - name: password + description: Password for basic HTTP authentication. + default_value: "" + required: false + - name: proxy_url + description: Proxy URL. + default_value: "" + required: false + - name: proxy_username + description: Username for proxy basic HTTP authentication. + default_value: "" + required: false + - name: proxy_password + description: Password for proxy basic HTTP authentication. + default_value: "" + required: false + - name: method + description: HTTP request method. + default_value: "GET" + required: false + - name: body + description: HTTP request body. + default_value: "" + required: false + - name: headers + description: HTTP request headers. + default_value: "" + required: false + - name: not_follow_redirects + description: Redirect handling policy. Controls whether the client follows redirects. + default_value: no + required: false + - name: tls_skip_verify + description: Server certificate chain and hostname validation policy. Controls whether the client performs this check. + default_value: no + required: false + - name: tls_ca + description: Certification authority that the client uses when verifying the server's certificates. + default_value: "" + required: false + - name: tls_cert + description: Client TLS certificate. + default_value: "" + required: false + - name: tls_key + description: Client TLS key. + default_value: "" + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration. + config: | + jobs: + - name: dockerhub + repositories: + - 'user1/name1' + - 'user2/name2' + - 'user3/name3' + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: dockerhub.pulls_sum + description: Pulls Summary + unit: pulls + chart_type: line + dimensions: + - name: sum + - name: dockerhub.pulls + description: Pulls + unit: pulls + chart_type: stacked + dimensions: + - name: a dimension per repository + - name: dockerhub.pulls_rate + description: Pulls Rate + unit: pulls/s + chart_type: stacked + dimensions: + - name: a dimension per repository + - name: dockerhub.stars + description: Stars + unit: stars + chart_type: stacked + dimensions: + - name: a dimension per repository + - name: dockerhub.status + description: Current Status + unit: status + chart_type: line + dimensions: + - name: a dimension per repository + - name: dockerhub.last_updated + description: Time Since Last Updated + unit: seconds + chart_type: line + dimensions: + - name: a dimension per repository diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.json b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.json new file mode 100644 index 000000000..3496e747c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.json @@ -0,0 +1,23 @@ +{ + "update_every": 123, + "url": "ok", + "body": "ok", + "method": "ok", + "headers": { + "ok": "ok" + }, + "username": "ok", + "password": "ok", + "proxy_url": "ok", + "proxy_username": "ok", + "proxy_password": "ok", + "timeout": 123.123, + "not_follow_redirects": true, + "tls_ca": "ok", + "tls_cert": "ok", + "tls_key": "ok", + "tls_skip_verify": true, + "repositories": [ + "ok" + ] +} diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.yaml new file mode 100644 index 000000000..20c4ba61b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/config.yaml @@ -0,0 +1,19 @@ +update_every: 123 +url: "ok" +body: "ok" +method: "ok" +headers: + ok: "ok" +username: "ok" +password: "ok" +proxy_url: "ok" +proxy_username: "ok" +proxy_password: "ok" +timeout: 123.123 +not_follow_redirects: yes +tls_ca: "ok" +tls_cert: "ok" +tls_key: "ok" +tls_skip_verify: yes +repositories: + - "ok" diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo1.txt b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo1.txt new file mode 100644 index 000000000..b67e2f382 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo1.txt @@ -0,0 +1,22 @@ +{ + "user": "user1", + "name": "name1", + "namespace": "namespace", + "repository_type": "image", + "status": 1, + "description": "Description.", + "is_private": false, + "is_automated": false, + "can_edit": false, + "star_count": 45, + "pull_count": 18540191, + "last_updated": "2019-03-28T21:26:05.527650Z", + "is_migrated": false, + "has_starred": false, + "affiliation": null, + "permissions": { + "read": true, + "write": false, + "admin": false + } +}
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo2.txt b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo2.txt new file mode 100644 index 000000000..e84ba989b --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo2.txt @@ -0,0 +1,22 @@ +{ + "user": "user2", + "name": "name2", + "namespace": "namespace", + "repository_type": "image", + "status": 1, + "description": "Description.", + "is_private": false, + "is_automated": false, + "can_edit": false, + "star_count": 45, + "pull_count": 18540192, + "last_updated": "2019-03-28T21:26:05.527650Z", + "is_migrated": false, + "has_starred": false, + "affiliation": null, + "permissions": { + "read": true, + "write": false, + "admin": false + } +}
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo3.txt b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo3.txt new file mode 100644 index 000000000..1fc64a9c3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/dockerhub/testdata/repo3.txt @@ -0,0 +1,22 @@ +{ + "user": "user3", + "name": "name3", + "namespace": "namespace", + "repository_type": "image", + "status": 1, + "description": "Description.", + "is_private": false, + "is_automated": false, + "can_edit": false, + "star_count": 45, + "pull_count": 18540193, + "last_updated": "2019-03-28T21:26:05.527650Z", + "is_migrated": false, + "has_starred": false, + "affiliation": null, + "permissions": { + "read": true, + "write": false, + "admin": false + } +}
\ No newline at end of file |