diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/go/collectors/go.d.plugin/modules/docker | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/docker')
10 files changed, 1807 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/docker/README.md b/src/go/collectors/go.d.plugin/modules/docker/README.md new file mode 120000 index 000000000..b4804ee06 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/README.md @@ -0,0 +1 @@ +integrations/docker.md
\ No newline at end of file diff --git a/src/go/collectors/go.d.plugin/modules/docker/charts.go b/src/go/collectors/go.d.plugin/modules/docker/charts.go new file mode 100644 index 000000000..2dd26c0e3 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/charts.go @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "fmt" + "strings" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" +) + +const ( + prioContainersState = module.Priority + iota + prioContainersHealthy + + prioContainerState + prioContainerHealthStatus + prioContainerWritableLayerSize + + prioImagesCount + prioImagesSize +) + +var summaryCharts = module.Charts{ + containersStateChart.Copy(), + containersHealthyChart.Copy(), + + imagesCountChart.Copy(), + imagesSizeChart.Copy(), +} + +var ( + containersStateChart = module.Chart{ + ID: "containers_state", + Title: "Total number of Docker containers in various states", + Units: "containers", + Fam: "containers", + Ctx: "docker.containers_state", + Priority: prioContainersState, + Type: module.Stacked, + Dims: module.Dims{ + {ID: "containers_state_running", Name: "running"}, + {ID: "containers_state_paused", Name: "paused"}, + {ID: "containers_state_exited", Name: "exited"}, + }, + } + containersHealthyChart = module.Chart{ + ID: "healthy_containers", + Title: "Total number of Docker containers in various health states", + Units: "containers", + Fam: "containers", + Ctx: "docker.containers_health_status", + Priority: prioContainersHealthy, + Dims: module.Dims{ + {ID: "containers_health_status_healthy", Name: "healthy"}, + {ID: "containers_health_status_unhealthy", Name: "unhealthy"}, + {ID: "containers_health_status_not_running_unhealthy", Name: "not_running_unhealthy"}, + {ID: "containers_health_status_starting", Name: "starting"}, + {ID: "containers_health_status_none", Name: "no_healthcheck"}, + }, + } +) + +var ( + imagesCountChart = module.Chart{ + ID: "images_count", + Title: "Total number of Docker images in various states", + Units: "images", + Fam: "images", + Ctx: "docker.images", + Priority: prioImagesCount, + Type: module.Stacked, + Dims: module.Dims{ + {ID: "images_active", Name: "active"}, + {ID: "images_dangling", Name: "dangling"}, + }, + } + imagesSizeChart = module.Chart{ + ID: "images_size", + Title: "Total size of all Docker images", + Units: "bytes", + Fam: "images", + Ctx: "docker.images_size", + Priority: prioImagesSize, + Dims: module.Dims{ + {ID: "images_size", Name: "size"}, + }, + } +) + +var ( + containerChartsTmpl = module.Charts{ + containerStateChartTmpl.Copy(), + containerHealthStatusChartTmpl.Copy(), + containerWritableLayerSizeChartTmpl.Copy(), + } + + containerStateChartTmpl = module.Chart{ + ID: "container_%s_state", + Title: "Docker container state", + Units: "state", + Fam: "containers", + Ctx: "docker.container_state", + Priority: prioContainerState, + Dims: module.Dims{ + {ID: "container_%s_state_running", Name: "running"}, + {ID: "container_%s_state_paused", Name: "paused"}, + {ID: "container_%s_state_exited", Name: "exited"}, + {ID: "container_%s_state_created", Name: "created"}, + {ID: "container_%s_state_restarting", Name: "restarting"}, + {ID: "container_%s_state_removing", Name: "removing"}, + {ID: "container_%s_state_dead", Name: "dead"}, + }, + } + containerHealthStatusChartTmpl = module.Chart{ + ID: "container_%s_health_status", + Title: "Docker container health status", + Units: "status", + Fam: "containers", + Ctx: "docker.container_health_status", + Priority: prioContainerHealthStatus, + Dims: module.Dims{ + {ID: "container_%s_health_status_healthy", Name: "healthy"}, + {ID: "container_%s_health_status_unhealthy", Name: "unhealthy"}, + {ID: "container_%s_health_status_not_running_unhealthy", Name: "not_running_unhealthy"}, + {ID: "container_%s_health_status_starting", Name: "starting"}, + {ID: "container_%s_health_status_none", Name: "no_healthcheck"}, + }, + } + containerWritableLayerSizeChartTmpl = module.Chart{ + ID: "container_%s_writable_layer_size", + Title: "Docker container writable layer size", + Units: "bytes", + Fam: "containers", + Ctx: "docker.container_writeable_layer_size", + Priority: prioContainerWritableLayerSize, + Dims: module.Dims{ + {ID: "container_%s_size_rw", Name: "writable_layer"}, + }, + } +) + +func (d *Docker) addContainerCharts(name, image string) { + charts := containerChartsTmpl.Copy() + if !d.CollectContainerSize { + _ = charts.Remove(containerWritableLayerSizeChartTmpl.ID) + } + + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, name) + chart.Labels = []module.Label{ + {Key: "container_name", Value: name}, + {Key: "image", Value: image}, + } + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, name) + } + } + + if err := d.Charts().Add(*charts...); err != nil { + d.Warning(err) + } +} + +func (d *Docker) removeContainerCharts(name string) { + px := fmt.Sprintf("container_%s", name) + + for _, chart := range *d.Charts() { + if strings.HasPrefix(chart.ID, px) { + chart.MarkRemove() + chart.MarkNotCreated() + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/collect.go b/src/go/collectors/go.d.plugin/modules/docker/collect.go new file mode 100644 index 000000000..7b5af7cab --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/collect.go @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + "fmt" + "strings" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + "github.com/docker/docker/api/types/filters" +) + +func (d *Docker) collect() (map[string]int64, error) { + if d.client == nil { + client, err := d.newClient(d.Config) + if err != nil { + return nil, err + } + d.client = client + } + + if !d.verNegotiated { + d.verNegotiated = true + d.negotiateAPIVersion() + } + + defer func() { _ = d.client.Close() }() + + mx := make(map[string]int64) + + if err := d.collectInfo(mx); err != nil { + return nil, err + } + if err := d.collectImages(mx); err != nil { + return nil, err + } + if err := d.collectContainers(mx); err != nil { + return nil, err + } + + return mx, nil +} + +func (d *Docker) collectInfo(mx map[string]int64) error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + info, err := d.client.Info(ctx) + if err != nil { + return err + } + + mx["containers_state_running"] = int64(info.ContainersRunning) + mx["containers_state_paused"] = int64(info.ContainersPaused) + mx["containers_state_exited"] = int64(info.ContainersStopped) + + return nil +} + +func (d *Docker) collectImages(mx map[string]int64) error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + images, err := d.client.ImageList(ctx, types.ImageListOptions{}) + if err != nil { + return err + } + + mx["images_size"] = 0 + mx["images_dangling"] = 0 + mx["images_active"] = 0 + + for _, v := range images { + mx["images_size"] += v.Size + if v.Containers == 0 { + mx["images_dangling"]++ + } else { + mx["images_active"]++ + } + } + + return nil +} + +var ( + containerHealthStatuses = []string{ + types.Healthy, + types.Unhealthy, + types.Starting, + types.NoHealthcheck, + } + containerStates = []string{ + "created", + "running", + "paused", + "restarting", + "removing", + "exited", + "dead", + } +) + +func (d *Docker) collectContainers(mx map[string]int64) error { + containerSet := make(map[string][]types.Container) + + for _, status := range containerHealthStatuses { + if err := func() error { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + v, err := d.client.ContainerList(ctx, typesContainer.ListOptions{ + All: true, + Filters: filters.NewArgs(filters.KeyValuePair{Key: "health", Value: status}), + Size: d.CollectContainerSize, + }) + if err != nil { + return err + } + containerSet[status] = v + return nil + + }(); err != nil { + return err + } + } + + seen := make(map[string]bool) + + for _, s := range containerHealthStatuses { + mx["containers_health_status_"+s] = 0 + } + mx["containers_health_status_not_running_unhealthy"] = 0 + + for status, containers := range containerSet { + if status != types.Unhealthy { + mx["containers_health_status_"+status] = int64(len(containers)) + } + + for _, cntr := range containers { + if status == types.Unhealthy { + if cntr.State == "running" { + mx["containers_health_status_"+status] += 1 + } else { + mx["containers_health_status_not_running_unhealthy"] += 1 + } + } + + if len(cntr.Names) == 0 { + continue + } + + name := strings.TrimPrefix(cntr.Names[0], "/") + + seen[name] = true + + if !d.containers[name] { + d.containers[name] = true + d.addContainerCharts(name, cntr.Image) + } + + px := fmt.Sprintf("container_%s_", name) + + for _, s := range containerHealthStatuses { + mx[px+"health_status_"+s] = 0 + } + mx[px+"health_status_not_running_unhealthy"] = 0 + for _, s := range containerStates { + mx[px+"state_"+s] = 0 + } + + if status == types.Unhealthy && cntr.State != "running" { + mx[px+"health_status_not_running_unhealthy"] += 1 + } else { + mx[px+"health_status_"+status] = 1 + } + mx[px+"state_"+cntr.State] = 1 + mx[px+"size_rw"] = cntr.SizeRw + mx[px+"size_root_fs"] = cntr.SizeRootFs + } + } + + for name := range d.containers { + if !seen[name] { + delete(d.containers, name) + d.removeContainerCharts(name) + } + } + + return nil +} + +func (d *Docker) negotiateAPIVersion() { + ctx, cancel := context.WithTimeout(context.Background(), d.Timeout.Duration()) + defer cancel() + + d.client.NegotiateAPIVersion(ctx) +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/config_schema.json b/src/go/collectors/go.d.plugin/modules/docker/config_schema.json new file mode 100644 index 000000000..bd48c9126 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/config_schema.json @@ -0,0 +1,52 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Docker collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "address": { + "title": "Address", + "description": "Docker daemon's Unix or TCP (listening address) socket.", + "type": "string", + "default": "unix:///var/run/docker.sock" + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for establishing a connection and communication (reading and writing) in seconds.", + "type": "number", + "default": 2 + }, + "collect_container_size": { + "title": "Collect container size", + "description": "Collect container writable layer size.", + "type": "boolean", + "default": false + } + }, + "required": [ + "address" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "address": { + "ui:help": "Use `unix://{path_to_socket}` for Unix socket or `tcp://{ip}:{port}` for TCP socket." + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/docker.go b/src/go/collectors/go.d.plugin/modules/docker/docker.go new file mode 100644 index 000000000..7328a7ca6 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/docker.go @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + _ "embed" + "errors" + "time" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + "github.com/netdata/netdata/go/go.d.plugin/pkg/web" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + typesImage "github.com/docker/docker/api/types/image" + typesSystem "github.com/docker/docker/api/types/system" + docker "github.com/docker/docker/client" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("docker", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + }) +} + +func New() *Docker { + return &Docker{ + Config: Config{ + Address: docker.DefaultDockerHost, + Timeout: web.Duration(time.Second * 2), + CollectContainerSize: false, + }, + + charts: summaryCharts.Copy(), + newClient: func(cfg Config) (dockerClient, error) { + return docker.NewClientWithOpts(docker.WithHost(cfg.Address)) + }, + containers: make(map[string]bool), + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every" json:"update_every"` + Address string `yaml:"address" json:"address"` + Timeout web.Duration `yaml:"timeout" json:"timeout"` + CollectContainerSize bool `yaml:"collect_container_size" json:"collect_container_size"` +} + +type ( + Docker struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + + client dockerClient + newClient func(Config) (dockerClient, error) + + verNegotiated bool + containers map[string]bool + } + dockerClient interface { + NegotiateAPIVersion(context.Context) + Info(context.Context) (typesSystem.Info, error) + ImageList(context.Context, types.ImageListOptions) ([]typesImage.Summary, error) + ContainerList(context.Context, typesContainer.ListOptions) ([]types.Container, error) + Close() error + } +) + +func (d *Docker) Configuration() any { + return d.Config +} + +func (d *Docker) Init() error { + return nil +} + +func (d *Docker) Check() error { + mx, err := d.collect() + if err != nil { + d.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + + } + return nil +} + +func (d *Docker) Charts() *module.Charts { + return d.charts +} + +func (d *Docker) Collect() map[string]int64 { + mx, err := d.collect() + if err != nil { + d.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (d *Docker) Cleanup() { + if d.client == nil { + return + } + if err := d.client.Close(); err != nil { + d.Warningf("error on closing docker client: %v", err) + } + d.client = nil +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/docker_test.go b/src/go/collectors/go.d.plugin/modules/docker/docker_test.go new file mode 100644 index 000000000..934178b9a --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/docker_test.go @@ -0,0 +1,852 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package docker + +import ( + "context" + "errors" + "os" + "testing" + + "github.com/netdata/netdata/go/go.d.plugin/agent/module" + + "github.com/docker/docker/api/types" + typesContainer "github.com/docker/docker/api/types/container" + typesImage "github.com/docker/docker/api/types/image" + typesSystem "github.com/docker/docker/api/types/system" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + } { + require.NotNil(t, data, name) + } +} + +func TestDocker_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &Docker{}, dataConfigJSON, dataConfigYAML) +} + +func TestDocker_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "default config": { + wantFail: false, + config: New().Config, + }, + "unset 'address'": { + wantFail: false, + config: Config{ + Address: "", + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := New() + d.Config = test.config + + if test.wantFail { + assert.Error(t, d.Init()) + } else { + assert.NoError(t, d.Init()) + } + }) + } +} + +func TestDocker_Charts(t *testing.T) { + assert.Equal(t, len(summaryCharts), len(*New().Charts())) +} + +func TestDocker_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func(d *Docker) + wantClose bool + }{ + "after New": { + wantClose: false, + prepare: func(d *Docker) {}, + }, + "after Init": { + wantClose: false, + prepare: func(d *Docker) { _ = d.Init() }, + }, + "after Check": { + wantClose: true, + prepare: func(d *Docker) { _ = d.Init(); _ = d.Check() }, + }, + "after Collect": { + wantClose: true, + prepare: func(d *Docker) { _ = d.Init(); d.Collect() }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + m := &mockClient{} + d := New() + d.newClient = prepareNewClientFunc(m) + + test.prepare(d) + + require.NotPanics(t, d.Cleanup) + + if test.wantClose { + assert.True(t, m.closeCalled) + } else { + assert.False(t, m.closeCalled) + } + }) + } +} + +func TestDocker_Check(t *testing.T) { + tests := map[string]struct { + prepare func() *Docker + wantFail bool + }{ + "case success": { + wantFail: false, + prepare: func() *Docker { + return prepareCaseSuccess() + }, + }, + "case success without container size": { + wantFail: false, + prepare: func() *Docker { + return prepareCaseSuccessWithoutContainerSize() + }, + }, + "fail on case err on Info()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnInfo() + }, + }, + "fail on case err on ImageList()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnImageList() + }, + }, + "fail on case err on ContainerList()": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrOnContainerList() + }, + }, + "fail on case err on creating Docker client": { + wantFail: true, + prepare: func() *Docker { + return prepareCaseErrCreatingClient() + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := test.prepare() + + require.NoError(t, d.Init()) + + if test.wantFail { + assert.Error(t, d.Check()) + } else { + assert.NoError(t, d.Check()) + } + }) + } +} + +func TestDocker_Collect(t *testing.T) { + tests := map[string]struct { + prepare func() *Docker + expected map[string]int64 + }{ + "case success": { + prepare: func() *Docker { + return prepareCaseSuccess() + }, + expected: map[string]int64{ + "container_container10_health_status_healthy": 0, + "container_container10_health_status_none": 0, + "container_container10_health_status_not_running_unhealthy": 1, + "container_container10_health_status_starting": 0, + "container_container10_health_status_unhealthy": 0, + "container_container10_size_root_fs": 0, + "container_container10_size_rw": 0, + "container_container10_state_created": 0, + "container_container10_state_dead": 1, + "container_container10_state_exited": 0, + "container_container10_state_paused": 0, + "container_container10_state_removing": 0, + "container_container10_state_restarting": 0, + "container_container10_state_running": 0, + "container_container11_health_status_healthy": 0, + "container_container11_health_status_none": 0, + "container_container11_health_status_not_running_unhealthy": 0, + "container_container11_health_status_starting": 1, + "container_container11_health_status_unhealthy": 0, + "container_container11_size_root_fs": 0, + "container_container11_size_rw": 0, + "container_container11_state_created": 0, + "container_container11_state_dead": 0, + "container_container11_state_exited": 0, + "container_container11_state_paused": 0, + "container_container11_state_removing": 1, + "container_container11_state_restarting": 0, + "container_container11_state_running": 0, + "container_container12_health_status_healthy": 0, + "container_container12_health_status_none": 0, + "container_container12_health_status_not_running_unhealthy": 0, + "container_container12_health_status_starting": 1, + "container_container12_health_status_unhealthy": 0, + "container_container12_size_root_fs": 0, + "container_container12_size_rw": 0, + "container_container12_state_created": 0, + "container_container12_state_dead": 0, + "container_container12_state_exited": 1, + "container_container12_state_paused": 0, + "container_container12_state_removing": 0, + "container_container12_state_restarting": 0, + "container_container12_state_running": 0, + "container_container13_health_status_healthy": 0, + "container_container13_health_status_none": 0, + "container_container13_health_status_not_running_unhealthy": 0, + "container_container13_health_status_starting": 1, + "container_container13_health_status_unhealthy": 0, + "container_container13_size_root_fs": 0, + "container_container13_size_rw": 0, + "container_container13_state_created": 0, + "container_container13_state_dead": 0, + "container_container13_state_exited": 1, + "container_container13_state_paused": 0, + "container_container13_state_removing": 0, + "container_container13_state_restarting": 0, + "container_container13_state_running": 0, + "container_container14_health_status_healthy": 0, + "container_container14_health_status_none": 1, + "container_container14_health_status_not_running_unhealthy": 0, + "container_container14_health_status_starting": 0, + "container_container14_health_status_unhealthy": 0, + "container_container14_size_root_fs": 0, + "container_container14_size_rw": 0, + "container_container14_state_created": 0, + "container_container14_state_dead": 1, + "container_container14_state_exited": 0, + "container_container14_state_paused": 0, + "container_container14_state_removing": 0, + "container_container14_state_restarting": 0, + "container_container14_state_running": 0, + "container_container15_health_status_healthy": 0, + "container_container15_health_status_none": 1, + "container_container15_health_status_not_running_unhealthy": 0, + "container_container15_health_status_starting": 0, + "container_container15_health_status_unhealthy": 0, + "container_container15_size_root_fs": 0, + "container_container15_size_rw": 0, + "container_container15_state_created": 0, + "container_container15_state_dead": 1, + "container_container15_state_exited": 0, + "container_container15_state_paused": 0, + "container_container15_state_removing": 0, + "container_container15_state_restarting": 0, + "container_container15_state_running": 0, + "container_container16_health_status_healthy": 0, + "container_container16_health_status_none": 1, + "container_container16_health_status_not_running_unhealthy": 0, + "container_container16_health_status_starting": 0, + "container_container16_health_status_unhealthy": 0, + "container_container16_size_root_fs": 0, + "container_container16_size_rw": 0, + "container_container16_state_created": 0, + "container_container16_state_dead": 1, + "container_container16_state_exited": 0, + "container_container16_state_paused": 0, + "container_container16_state_removing": 0, + "container_container16_state_restarting": 0, + "container_container16_state_running": 0, + "container_container1_health_status_healthy": 1, + "container_container1_health_status_none": 0, + "container_container1_health_status_not_running_unhealthy": 0, + "container_container1_health_status_starting": 0, + "container_container1_health_status_unhealthy": 0, + "container_container1_size_root_fs": 0, + "container_container1_size_rw": 0, + "container_container1_state_created": 1, + "container_container1_state_dead": 0, + "container_container1_state_exited": 0, + "container_container1_state_paused": 0, + "container_container1_state_removing": 0, + "container_container1_state_restarting": 0, + "container_container1_state_running": 0, + "container_container2_health_status_healthy": 1, + "container_container2_health_status_none": 0, + "container_container2_health_status_not_running_unhealthy": 0, + "container_container2_health_status_starting": 0, + "container_container2_health_status_unhealthy": 0, + "container_container2_size_root_fs": 0, + "container_container2_size_rw": 0, + "container_container2_state_created": 0, + "container_container2_state_dead": 0, + "container_container2_state_exited": 0, + "container_container2_state_paused": 0, + "container_container2_state_removing": 0, + "container_container2_state_restarting": 0, + "container_container2_state_running": 1, + "container_container3_health_status_healthy": 1, + "container_container3_health_status_none": 0, + "container_container3_health_status_not_running_unhealthy": 0, + "container_container3_health_status_starting": 0, + "container_container3_health_status_unhealthy": 0, + "container_container3_size_root_fs": 0, + "container_container3_size_rw": 0, + "container_container3_state_created": 0, + "container_container3_state_dead": 0, + "container_container3_state_exited": 0, + "container_container3_state_paused": 0, + "container_container3_state_removing": 0, + "container_container3_state_restarting": 0, + "container_container3_state_running": 1, + "container_container4_health_status_healthy": 0, + "container_container4_health_status_none": 0, + "container_container4_health_status_not_running_unhealthy": 1, + "container_container4_health_status_starting": 0, + "container_container4_health_status_unhealthy": 0, + "container_container4_size_root_fs": 0, + "container_container4_size_rw": 0, + "container_container4_state_created": 1, + "container_container4_state_dead": 0, + "container_container4_state_exited": 0, + "container_container4_state_paused": 0, + "container_container4_state_removing": 0, + "container_container4_state_restarting": 0, + "container_container4_state_running": 0, + "container_container5_health_status_healthy": 0, + "container_container5_health_status_none": 0, + "container_container5_health_status_not_running_unhealthy": 0, + "container_container5_health_status_starting": 0, + "container_container5_health_status_unhealthy": 1, + "container_container5_size_root_fs": 0, + "container_container5_size_rw": 0, + "container_container5_state_created": 0, + "container_container5_state_dead": 0, + "container_container5_state_exited": 0, + "container_container5_state_paused": 0, + "container_container5_state_removing": 0, + "container_container5_state_restarting": 0, + "container_container5_state_running": 1, + "container_container6_health_status_healthy": 0, + "container_container6_health_status_none": 0, + "container_container6_health_status_not_running_unhealthy": 1, + "container_container6_health_status_starting": 0, + "container_container6_health_status_unhealthy": 0, + "container_container6_size_root_fs": 0, + "container_container6_size_rw": 0, + "container_container6_state_created": 0, + "container_container6_state_dead": 0, + "container_container6_state_exited": 0, + "container_container6_state_paused": 1, + "container_container6_state_removing": 0, + "container_container6_state_restarting": 0, + "container_container6_state_running": 0, + "container_container7_health_status_healthy": 0, + "container_container7_health_status_none": 0, + "container_container7_health_status_not_running_unhealthy": 1, + "container_container7_health_status_starting": 0, + "container_container7_health_status_unhealthy": 0, + "container_container7_size_root_fs": 0, + "container_container7_size_rw": 0, + "container_container7_state_created": 0, + "container_container7_state_dead": 0, + "container_container7_state_exited": 0, + "container_container7_state_paused": 0, + "container_container7_state_removing": 0, + "container_container7_state_restarting": 1, + "container_container7_state_running": 0, + "container_container8_health_status_healthy": 0, + "container_container8_health_status_none": 0, + "container_container8_health_status_not_running_unhealthy": 1, + "container_container8_health_status_starting": 0, + "container_container8_health_status_unhealthy": 0, + "container_container8_size_root_fs": 0, + "container_container8_size_rw": 0, + "container_container8_state_created": 0, + "container_container8_state_dead": 0, + "container_container8_state_exited": 0, + "container_container8_state_paused": 0, + "container_container8_state_removing": 1, + "container_container8_state_restarting": 0, + "container_container8_state_running": 0, + "container_container9_health_status_healthy": 0, + "container_container9_health_status_none": 0, + "container_container9_health_status_not_running_unhealthy": 1, + "container_container9_health_status_starting": 0, + "container_container9_health_status_unhealthy": 0, + "container_container9_size_root_fs": 0, + "container_container9_size_rw": 0, + "container_container9_state_created": 0, + "container_container9_state_dead": 0, + "container_container9_state_exited": 1, + "container_container9_state_paused": 0, + "container_container9_state_removing": 0, + "container_container9_state_restarting": 0, + "container_container9_state_running": 0, + "containers_health_status_healthy": 3, + "containers_health_status_none": 3, + "containers_health_status_not_running_unhealthy": 6, + "containers_health_status_starting": 3, + "containers_health_status_unhealthy": 1, + "containers_state_exited": 6, + "containers_state_paused": 5, + "containers_state_running": 4, + "images_active": 1, + "images_dangling": 1, + "images_size": 300, + }, + }, + "case success without container size": { + prepare: func() *Docker { + return prepareCaseSuccessWithoutContainerSize() + }, + expected: map[string]int64{ + "container_container10_health_status_healthy": 0, + "container_container10_health_status_none": 0, + "container_container10_health_status_not_running_unhealthy": 1, + "container_container10_health_status_starting": 0, + "container_container10_health_status_unhealthy": 0, + "container_container10_size_root_fs": 0, + "container_container10_size_rw": 0, + "container_container10_state_created": 0, + "container_container10_state_dead": 1, + "container_container10_state_exited": 0, + "container_container10_state_paused": 0, + "container_container10_state_removing": 0, + "container_container10_state_restarting": 0, + "container_container10_state_running": 0, + "container_container11_health_status_healthy": 0, + "container_container11_health_status_none": 0, + "container_container11_health_status_not_running_unhealthy": 0, + "container_container11_health_status_starting": 1, + "container_container11_health_status_unhealthy": 0, + "container_container11_size_root_fs": 0, + "container_container11_size_rw": 0, + "container_container11_state_created": 0, + "container_container11_state_dead": 0, + "container_container11_state_exited": 0, + "container_container11_state_paused": 0, + "container_container11_state_removing": 1, + "container_container11_state_restarting": 0, + "container_container11_state_running": 0, + "container_container12_health_status_healthy": 0, + "container_container12_health_status_none": 0, + "container_container12_health_status_not_running_unhealthy": 0, + "container_container12_health_status_starting": 1, + "container_container12_health_status_unhealthy": 0, + "container_container12_size_root_fs": 0, + "container_container12_size_rw": 0, + "container_container12_state_created": 0, + "container_container12_state_dead": 0, + "container_container12_state_exited": 1, + "container_container12_state_paused": 0, + "container_container12_state_removing": 0, + "container_container12_state_restarting": 0, + "container_container12_state_running": 0, + "container_container13_health_status_healthy": 0, + "container_container13_health_status_none": 0, + "container_container13_health_status_not_running_unhealthy": 0, + "container_container13_health_status_starting": 1, + "container_container13_health_status_unhealthy": 0, + "container_container13_size_root_fs": 0, + "container_container13_size_rw": 0, + "container_container13_state_created": 0, + "container_container13_state_dead": 0, + "container_container13_state_exited": 1, + "container_container13_state_paused": 0, + "container_container13_state_removing": 0, + "container_container13_state_restarting": 0, + "container_container13_state_running": 0, + "container_container14_health_status_healthy": 0, + "container_container14_health_status_none": 1, + "container_container14_health_status_not_running_unhealthy": 0, + "container_container14_health_status_starting": 0, + "container_container14_health_status_unhealthy": 0, + "container_container14_size_root_fs": 0, + "container_container14_size_rw": 0, + "container_container14_state_created": 0, + "container_container14_state_dead": 1, + "container_container14_state_exited": 0, + "container_container14_state_paused": 0, + "container_container14_state_removing": 0, + "container_container14_state_restarting": 0, + "container_container14_state_running": 0, + "container_container15_health_status_healthy": 0, + "container_container15_health_status_none": 1, + "container_container15_health_status_not_running_unhealthy": 0, + "container_container15_health_status_starting": 0, + "container_container15_health_status_unhealthy": 0, + "container_container15_size_root_fs": 0, + "container_container15_size_rw": 0, + "container_container15_state_created": 0, + "container_container15_state_dead": 1, + "container_container15_state_exited": 0, + "container_container15_state_paused": 0, + "container_container15_state_removing": 0, + "container_container15_state_restarting": 0, + "container_container15_state_running": 0, + "container_container16_health_status_healthy": 0, + "container_container16_health_status_none": 1, + "container_container16_health_status_not_running_unhealthy": 0, + "container_container16_health_status_starting": 0, + "container_container16_health_status_unhealthy": 0, + "container_container16_size_root_fs": 0, + "container_container16_size_rw": 0, + "container_container16_state_created": 0, + "container_container16_state_dead": 1, + "container_container16_state_exited": 0, + "container_container16_state_paused": 0, + "container_container16_state_removing": 0, + "container_container16_state_restarting": 0, + "container_container16_state_running": 0, + "container_container1_health_status_healthy": 1, + "container_container1_health_status_none": 0, + "container_container1_health_status_not_running_unhealthy": 0, + "container_container1_health_status_starting": 0, + "container_container1_health_status_unhealthy": 0, + "container_container1_size_root_fs": 0, + "container_container1_size_rw": 0, + "container_container1_state_created": 1, + "container_container1_state_dead": 0, + "container_container1_state_exited": 0, + "container_container1_state_paused": 0, + "container_container1_state_removing": 0, + "container_container1_state_restarting": 0, + "container_container1_state_running": 0, + "container_container2_health_status_healthy": 1, + "container_container2_health_status_none": 0, + "container_container2_health_status_not_running_unhealthy": 0, + "container_container2_health_status_starting": 0, + "container_container2_health_status_unhealthy": 0, + "container_container2_size_root_fs": 0, + "container_container2_size_rw": 0, + "container_container2_state_created": 0, + "container_container2_state_dead": 0, + "container_container2_state_exited": 0, + "container_container2_state_paused": 0, + "container_container2_state_removing": 0, + "container_container2_state_restarting": 0, + "container_container2_state_running": 1, + "container_container3_health_status_healthy": 1, + "container_container3_health_status_none": 0, + "container_container3_health_status_not_running_unhealthy": 0, + "container_container3_health_status_starting": 0, + "container_container3_health_status_unhealthy": 0, + "container_container3_size_root_fs": 0, + "container_container3_size_rw": 0, + "container_container3_state_created": 0, + "container_container3_state_dead": 0, + "container_container3_state_exited": 0, + "container_container3_state_paused": 0, + "container_container3_state_removing": 0, + "container_container3_state_restarting": 0, + "container_container3_state_running": 1, + "container_container4_health_status_healthy": 0, + "container_container4_health_status_none": 0, + "container_container4_health_status_not_running_unhealthy": 1, + "container_container4_health_status_starting": 0, + "container_container4_health_status_unhealthy": 0, + "container_container4_size_root_fs": 0, + "container_container4_size_rw": 0, + "container_container4_state_created": 1, + "container_container4_state_dead": 0, + "container_container4_state_exited": 0, + "container_container4_state_paused": 0, + "container_container4_state_removing": 0, + "container_container4_state_restarting": 0, + "container_container4_state_running": 0, + "container_container5_health_status_healthy": 0, + "container_container5_health_status_none": 0, + "container_container5_health_status_not_running_unhealthy": 0, + "container_container5_health_status_starting": 0, + "container_container5_health_status_unhealthy": 1, + "container_container5_size_root_fs": 0, + "container_container5_size_rw": 0, + "container_container5_state_created": 0, + "container_container5_state_dead": 0, + "container_container5_state_exited": 0, + "container_container5_state_paused": 0, + "container_container5_state_removing": 0, + "container_container5_state_restarting": 0, + "container_container5_state_running": 1, + "container_container6_health_status_healthy": 0, + "container_container6_health_status_none": 0, + "container_container6_health_status_not_running_unhealthy": 1, + "container_container6_health_status_starting": 0, + "container_container6_health_status_unhealthy": 0, + "container_container6_size_root_fs": 0, + "container_container6_size_rw": 0, + "container_container6_state_created": 0, + "container_container6_state_dead": 0, + "container_container6_state_exited": 0, + "container_container6_state_paused": 1, + "container_container6_state_removing": 0, + "container_container6_state_restarting": 0, + "container_container6_state_running": 0, + "container_container7_health_status_healthy": 0, + "container_container7_health_status_none": 0, + "container_container7_health_status_not_running_unhealthy": 1, + "container_container7_health_status_starting": 0, + "container_container7_health_status_unhealthy": 0, + "container_container7_size_root_fs": 0, + "container_container7_size_rw": 0, + "container_container7_state_created": 0, + "container_container7_state_dead": 0, + "container_container7_state_exited": 0, + "container_container7_state_paused": 0, + "container_container7_state_removing": 0, + "container_container7_state_restarting": 1, + "container_container7_state_running": 0, + "container_container8_health_status_healthy": 0, + "container_container8_health_status_none": 0, + "container_container8_health_status_not_running_unhealthy": 1, + "container_container8_health_status_starting": 0, + "container_container8_health_status_unhealthy": 0, + "container_container8_size_root_fs": 0, + "container_container8_size_rw": 0, + "container_container8_state_created": 0, + "container_container8_state_dead": 0, + "container_container8_state_exited": 0, + "container_container8_state_paused": 0, + "container_container8_state_removing": 1, + "container_container8_state_restarting": 0, + "container_container8_state_running": 0, + "container_container9_health_status_healthy": 0, + "container_container9_health_status_none": 0, + "container_container9_health_status_not_running_unhealthy": 1, + "container_container9_health_status_starting": 0, + "container_container9_health_status_unhealthy": 0, + "container_container9_size_root_fs": 0, + "container_container9_size_rw": 0, + "container_container9_state_created": 0, + "container_container9_state_dead": 0, + "container_container9_state_exited": 1, + "container_container9_state_paused": 0, + "container_container9_state_removing": 0, + "container_container9_state_restarting": 0, + "container_container9_state_running": 0, + "containers_health_status_healthy": 3, + "containers_health_status_none": 3, + "containers_health_status_not_running_unhealthy": 6, + "containers_health_status_starting": 3, + "containers_health_status_unhealthy": 1, + "containers_state_exited": 6, + "containers_state_paused": 5, + "containers_state_running": 4, + "images_active": 1, + "images_dangling": 1, + "images_size": 300, + }, + }, + "fail on case err on Info()": { + prepare: func() *Docker { + return prepareCaseErrOnInfo() + }, + expected: nil, + }, + "fail on case err on ImageList()": { + prepare: func() *Docker { + return prepareCaseErrOnImageList() + }, + expected: nil, + }, + "fail on case err on ContainerList()": { + prepare: func() *Docker { + return prepareCaseErrOnContainerList() + }, + expected: nil, + }, + "fail on case err on creating Docker client": { + prepare: func() *Docker { + return prepareCaseErrCreatingClient() + }, + expected: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + d := test.prepare() + + require.NoError(t, d.Init()) + + mx := d.Collect() + + require.Equal(t, test.expected, mx) + + if d.client != nil { + m, ok := d.client.(*mockClient) + require.True(t, ok) + require.True(t, m.negotiateAPIVersionCalled) + } + + }) + } +} + +func prepareCaseSuccess() *Docker { + d := New() + d.CollectContainerSize = true + d.newClient = prepareNewClientFunc(&mockClient{}) + return d +} + +func prepareCaseSuccessWithoutContainerSize() *Docker { + d := New() + d.CollectContainerSize = false + d.newClient = prepareNewClientFunc(&mockClient{}) + return d +} + +func prepareCaseErrOnInfo() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnInfo: true}) + return d +} + +func prepareCaseErrOnImageList() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnImageList: true}) + return d +} + +func prepareCaseErrOnContainerList() *Docker { + d := New() + d.newClient = prepareNewClientFunc(&mockClient{errOnContainerList: true}) + return d +} + +func prepareCaseErrCreatingClient() *Docker { + d := New() + d.newClient = prepareNewClientFunc(nil) + return d +} + +func prepareNewClientFunc(m *mockClient) func(_ Config) (dockerClient, error) { + if m == nil { + return func(_ Config) (dockerClient, error) { return nil, errors.New("mock.newClient() error") } + } + return func(_ Config) (dockerClient, error) { return m, nil } +} + +type mockClient struct { + errOnInfo bool + errOnImageList bool + errOnContainerList bool + negotiateAPIVersionCalled bool + closeCalled bool +} + +func (m *mockClient) Info(_ context.Context) (typesSystem.Info, error) { + if m.errOnInfo { + return typesSystem.Info{}, errors.New("mockClient.Info() error") + } + + return typesSystem.Info{ + ContainersRunning: 4, + ContainersPaused: 5, + ContainersStopped: 6, + }, nil +} + +func (m *mockClient) ContainerList(_ context.Context, opts typesContainer.ListOptions) ([]types.Container, error) { + if m.errOnContainerList { + return nil, errors.New("mockClient.ContainerList() error") + } + + v := opts.Filters.Get("health") + + if len(v) == 0 { + return nil, errors.New("mockClient.ContainerList() error (expect 'health' filter)") + } + + var containers []types.Container + + switch v[0] { + case types.Healthy: + containers = []types.Container{ + {Names: []string{"container1"}, State: "created", Image: "example/example:v1"}, + {Names: []string{"container2"}, State: "running", Image: "example/example:v1"}, + {Names: []string{"container3"}, State: "running", Image: "example/example:v1"}, + } + case types.Unhealthy: + containers = []types.Container{ + {Names: []string{"container4"}, State: "created", Image: "example/example:v2"}, + {Names: []string{"container5"}, State: "running", Image: "example/example:v2"}, + {Names: []string{"container6"}, State: "paused", Image: "example/example:v2"}, + {Names: []string{"container7"}, State: "restarting", Image: "example/example:v2"}, + {Names: []string{"container8"}, State: "removing", Image: "example/example:v2"}, + {Names: []string{"container9"}, State: "exited", Image: "example/example:v2"}, + {Names: []string{"container10"}, State: "dead", Image: "example/example:v2"}, + } + case types.Starting: + containers = []types.Container{ + {Names: []string{"container11"}, State: "removing", Image: "example/example:v3"}, + {Names: []string{"container12"}, State: "exited", Image: "example/example:v3"}, + {Names: []string{"container13"}, State: "exited", Image: "example/example:v3"}, + } + case types.NoHealthcheck: + containers = []types.Container{ + {Names: []string{"container14"}, State: "dead", Image: "example/example:v4"}, + {Names: []string{"container15"}, State: "dead", Image: "example/example:v4"}, + {Names: []string{"container16"}, State: "dead", Image: "example/example:v4"}, + } + } + + if opts.Size { + for _, c := range containers { + c.SizeRw = 123 + c.SizeRootFs = 321 + } + } + + return containers, nil +} + +func (m *mockClient) ImageList(_ context.Context, _ types.ImageListOptions) ([]typesImage.Summary, error) { + if m.errOnImageList { + return nil, errors.New("mockClient.ImageList() error") + } + + return []typesImage.Summary{ + { + Containers: 0, + Size: 100, + }, + { + Containers: 1, + Size: 200, + }, + }, nil +} + +func (m *mockClient) NegotiateAPIVersion(_ context.Context) { + m.negotiateAPIVersionCalled = true +} + +func (m *mockClient) Close() error { + m.closeCalled = true + return nil +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md b/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md new file mode 100644 index 000000000..abe7fe438 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/integrations/docker.md @@ -0,0 +1,208 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml" +sidebar_label: "Docker" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/Containers and VMs" +most_popular: True +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Docker + + +<img src="https://netdata.cloud/img/docker.svg" width="150"/> + + +Plugin: go.d.plugin +Module: docker + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector monitors Docker containers state, health status and more. + + +It connects to the Docker instance via a TCP or UNIX socket and executes the following commands: + +- [System info](https://docs.docker.com/engine/api/v1.43/#tag/System/operation/SystemInfo). +- [List images](https://docs.docker.com/engine/api/v1.43/#tag/Image/operation/ImageList). +- [List containers](https://docs.docker.com/engine/api/v1.43/#tag/Container/operation/ContainerList). + + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + +Requires netdata user to be in the docker group. + +### Default Behavior + +#### Auto-Detection + +It discovers instances running on localhost by attempting to connect to a known Docker UNIX socket: `/var/run/docker.sock`. + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +Enabling `collect_container_size` may result in high CPU usage depending on the version of Docker Engine. + + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Docker instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| docker.containers_state | running, paused, stopped | containers | +| docker.containers_health_status | healthy, unhealthy, not_running_unhealthy, starting, no_healthcheck | containers | +| docker.images | active, dangling | images | +| docker.images_size | size | bytes | + +### Per container + +Metrics related to containers. Each container provides its own set of the following metrics. + +Labels: + +| Label | Description | +|:-----------|:----------------| +| container_name | The container's name | +| image | The image name the container uses | + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| docker.container_state | running, paused, exited, created, restarting, removing, dead | state | +| docker.container_health_status | healthy, unhealthy, not_running_unhealthy, starting, no_healthcheck | status | +| docker.container_writeable_layer_size | writeable_layer | size | + + + +## Alerts + + +The following alerts are available: + +| Alert name | On metric | Description | +|:------------|:----------|:------------| +| [ docker_container_unhealthy ](https://github.com/netdata/netdata/blob/master/src/health/health.d/docker.conf) | docker.container_health_status | ${label:container_name} docker container health status is unhealthy | + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/docker.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/docker.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 1 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| address | Docker daemon's listening address. When using a TCP socket, the format is: tcp://[ip]:[port] | unix:///var/run/docker.sock | yes | +| timeout | Request timeout in seconds. | 2 | no | +| collect_container_size | Whether to collect container writable layer size. | no | no | + +</details> + +#### Examples + +##### Basic + +An example configuration. + +```yaml +jobs: + - name: local + address: 'unix:///var/run/docker.sock' + +``` +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +<details><summary>Config</summary> + +```yaml +jobs: + - name: local + address: 'unix:///var/run/docker.sock' + + - name: remote + address: 'tcp://203.0.113.10:2375' + +``` +</details> + + + +## Troubleshooting + +### Debug Mode + +To troubleshoot issues with the `docker` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m docker + ``` + + diff --git a/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml b/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml new file mode 100644 index 000000000..8fc6853a9 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/metadata.yaml @@ -0,0 +1,190 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-docker + plugin_name: go.d.plugin + module_name: docker + alternative_monitored_instances: [] + monitored_instance: + name: Docker + link: https://www.docker.com/ + categories: + - data-collection.containers-and-vms + icon_filename: docker.svg + related_resources: + integrations: + list: [] + info_provided_to_referring_integrations: + description: "" + keywords: + - container + most_popular: true + overview: + data_collection: + metrics_description: | + This collector monitors Docker containers state, health status and more. + method_description: | + It connects to the Docker instance via a TCP or UNIX socket and executes the following commands: + + - [System info](https://docs.docker.com/engine/api/v1.43/#tag/System/operation/SystemInfo). + - [List images](https://docs.docker.com/engine/api/v1.43/#tag/Image/operation/ImageList). + - [List containers](https://docs.docker.com/engine/api/v1.43/#tag/Container/operation/ContainerList). + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: Requires netdata user to be in the docker group. + default_behavior: + auto_detection: + description: | + It discovers instances running on localhost by attempting to connect to a known Docker UNIX socket: `/var/run/docker.sock`. + limits: + description: "" + performance_impact: + description: | + Enabling `collect_container_size` may result in high CPU usage depending on the version of Docker Engine. + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/docker.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 1 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: address + description: 'Docker daemon''s listening address. When using a TCP socket, the format is: tcp://[ip]:[port]' + default_value: unix:///var/run/docker.sock + required: true + - name: timeout + description: Request timeout in seconds. + default_value: 2 + required: false + - name: collect_container_size + description: Whether to collect container writable layer size. + default_value: "no" + required: false + examples: + folding: + enabled: true + title: Config + list: + - name: Basic + description: An example configuration. + folding: + enabled: false + config: | + jobs: + - name: local + address: 'unix:///var/run/docker.sock' + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + address: 'unix:///var/run/docker.sock' + + - name: remote + address: 'tcp://203.0.113.10:2375' + troubleshooting: + problems: + list: [] + alerts: + - name: docker_container_unhealthy + metric: docker.container_health_status + info: ${label:container_name} docker container health status is unhealthy + link: https://github.com/netdata/netdata/blob/master/src/health/health.d/docker.conf + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: docker.containers_state + description: Total number of Docker containers in various states + unit: containers + chart_type: stacked + dimensions: + - name: running + - name: paused + - name: stopped + - name: docker.containers_health_status + description: Total number of Docker containers in various health states + unit: containers + chart_type: line + dimensions: + - name: healthy + - name: unhealthy + - name: not_running_unhealthy + - name: starting + - name: no_healthcheck + - name: docker.images + description: Total number of Docker images in various states + unit: images + chart_type: stacked + dimensions: + - name: active + - name: dangling + - name: docker.images_size + description: Total size of all Docker images + unit: bytes + chart_type: line + dimensions: + - name: size + - name: container + description: Metrics related to containers. Each container provides its own set of the following metrics. + labels: + - name: container_name + description: The container's name + - name: image + description: The image name the container uses + metrics: + - name: docker.container_state + description: Docker container state + unit: state + chart_type: line + dimensions: + - name: running + - name: paused + - name: exited + - name: created + - name: restarting + - name: removing + - name: dead + - name: docker.container_health_status + description: Docker container health status + unit: status + chart_type: line + dimensions: + - name: healthy + - name: unhealthy + - name: not_running_unhealthy + - name: starting + - name: no_healthcheck + - name: docker.container_writeable_layer_size + description: Docker container writable layer size + unit: size + chart_type: line + dimensions: + - name: writeable_layer diff --git a/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json new file mode 100644 index 000000000..5e687448c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.json @@ -0,0 +1,6 @@ +{ + "update_every": 123, + "address": "ok", + "timeout": 123.123, + "collect_container_size": true +} diff --git a/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml new file mode 100644 index 000000000..2b0f32225 --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/docker/testdata/config.yaml @@ -0,0 +1,4 @@ +update_every: 123 +address: "ok" +timeout: 123.123 +collect_container_size: yes |