diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-26 08:15:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-26 08:15:35 +0000 |
commit | f09848204fa5283d21ea43e262ee41aa578e1808 (patch) | |
tree | c62385d7adf209fa6a798635954d887f718fb3fb /src/go/plugin/go.d/modules/chrony | |
parent | Releasing debian version 1.46.3-2. (diff) | |
download | netdata-f09848204fa5283d21ea43e262ee41aa578e1808.tar.xz netdata-f09848204fa5283d21ea43e262ee41aa578e1808.zip |
Merging upstream version 1.47.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/go/plugin/go.d/modules/chrony')
l--------- | src/go/plugin/go.d/modules/chrony/README.md | 1 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/charts.go | 320 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/chrony.go | 112 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/chrony_test.go | 326 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/client.go | 171 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/collect.go | 156 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/config_schema.json | 43 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/init.go | 14 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/integrations/chrony.md | 222 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/metadata.yaml | 208 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/testdata/config.json | 5 | ||||
-rw-r--r-- | src/go/plugin/go.d/modules/chrony/testdata/config.yaml | 3 |
12 files changed, 1581 insertions, 0 deletions
diff --git a/src/go/plugin/go.d/modules/chrony/README.md b/src/go/plugin/go.d/modules/chrony/README.md new file mode 120000 index 000000000..4a58f3733 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/README.md @@ -0,0 +1 @@ +integrations/chrony.md
\ No newline at end of file diff --git a/src/go/plugin/go.d/modules/chrony/charts.go b/src/go/plugin/go.d/modules/chrony/charts.go new file mode 100644 index 000000000..37a6fa3e6 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/charts.go @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" +) + +const ( + prioStratum = module.Priority + iota + prioCurrentCorrection + prioRootDelay + prioRootDispersion + prioLastOffset + prioRmsOffset + prioFrequency + prioResidualFrequency + prioSkew + prioUpdateInterval + prioRefMeasurementTime + prioLeapStatus + prioActivity + //prioNTPPackets + //prioCommandPackets + //prioNKEConnections + //prioClientLogRecords +) + +var charts = module.Charts{ + stratumChart.Copy(), + + currentCorrectionChart.Copy(), + + rootDelayChart.Copy(), + rootDispersionChart.Copy(), + + lastOffsetChart.Copy(), + rmsOffsetChart.Copy(), + + frequencyChart.Copy(), + residualFrequencyChart.Copy(), + + skewChart.Copy(), + + updateIntervalChart.Copy(), + refMeasurementTimeChart.Copy(), + + leapStatusChart.Copy(), + + activityChart.Copy(), +} + +// Tracking charts +var ( + stratumChart = module.Chart{ + ID: "stratum", + Title: "Distance to the reference clock", + Units: "level", + Fam: "stratum", + Ctx: "chrony.stratum", + Priority: prioStratum, + Dims: module.Dims{ + {ID: "stratum", Name: "stratum"}, + }, + } + + currentCorrectionChart = module.Chart{ + ID: "current_correction", + Title: "Current correction", + Units: "seconds", + Fam: "correction", + Ctx: "chrony.current_correction", + Priority: prioCurrentCorrection, + Dims: module.Dims{ + {ID: "current_correction", Div: scaleFactor}, + }, + } + + rootDelayChart = module.Chart{ + ID: "root_delay", + Title: "Network path delay to stratum-1", + Units: "seconds", + Fam: "root", + Ctx: "chrony.root_delay", + Priority: prioRootDelay, + Dims: module.Dims{ + {ID: "root_delay", Div: scaleFactor}, + }, + } + rootDispersionChart = module.Chart{ + ID: "root_dispersion", + Title: "Dispersion accumulated back to stratum-1", + Units: "seconds", + Fam: "root", + Ctx: "chrony.root_dispersion", + Priority: prioRootDispersion, + Dims: module.Dims{ + {ID: "root_dispersion", Div: scaleFactor}, + }, + } + + lastOffsetChart = module.Chart{ + ID: "last_offset", + Title: "Offset on the last clock update", + Units: "seconds", + Fam: "offset", + Ctx: "chrony.last_offset", + Priority: prioLastOffset, + Dims: module.Dims{ + {ID: "last_offset", Name: "offset", Div: scaleFactor}, + }, + } + rmsOffsetChart = module.Chart{ + ID: "rms_offset", + Title: "Long-term average of the offset value", + Units: "seconds", + Fam: "offset", + Ctx: "chrony.rms_offset", + Priority: prioRmsOffset, + Dims: module.Dims{ + {ID: "rms_offset", Name: "offset", Div: scaleFactor}, + }, + } + + frequencyChart = module.Chart{ + ID: "frequency", + Title: "Frequency", + Units: "ppm", + Fam: "frequency", + Ctx: "chrony.frequency", + Priority: prioFrequency, + Dims: module.Dims{ + {ID: "frequency", Div: scaleFactor}, + }, + } + residualFrequencyChart = module.Chart{ + ID: "residual_frequency", + Title: "Residual frequency", + Units: "ppm", + Fam: "frequency", + Ctx: "chrony.residual_frequency", + Priority: prioResidualFrequency, + Dims: module.Dims{ + {ID: "residual_frequency", Div: scaleFactor}, + }, + } + + skewChart = module.Chart{ + ID: "skew", + Title: "Skew", + Units: "ppm", + Fam: "frequency", + Ctx: "chrony.skew", + Priority: prioSkew, + Dims: module.Dims{ + {ID: "skew", Div: scaleFactor}, + }, + } + + updateIntervalChart = module.Chart{ + ID: "update_interval", + Title: "Interval between the last two clock updates", + Units: "seconds", + Fam: "updates", + Ctx: "chrony.update_interval", + Priority: prioUpdateInterval, + Dims: module.Dims{ + {ID: "update_interval", Div: scaleFactor}, + }, + } + refMeasurementTimeChart = module.Chart{ + ID: "ref_measurement_time", + Title: "Time since the last measurement", + Units: "seconds", + Fam: "updates", + Ctx: "chrony.ref_measurement_time", + Priority: prioRefMeasurementTime, + Dims: module.Dims{ + {ID: "ref_measurement_time"}, + }, + } + + leapStatusChart = module.Chart{ + ID: "leap_status", + Title: "Leap status", + Units: "status", + Fam: "leap status", + Ctx: "chrony.leap_status", + Priority: prioLeapStatus, + Dims: module.Dims{ + {ID: "leap_status_normal", Name: "normal"}, + {ID: "leap_status_insert_second", Name: "insert_second"}, + {ID: "leap_status_delete_second", Name: "delete_second"}, + {ID: "leap_status_unsynchronised", Name: "unsynchronised"}, + }, + } +) + +// Activity charts +var ( + activityChart = module.Chart{ + ID: "activity", + Title: "Peers activity", + Units: "sources", + Fam: "activity", + Ctx: "chrony.activity", + Type: module.Stacked, + Priority: prioActivity, + Dims: module.Dims{ + {ID: "online_sources", Name: "online"}, + {ID: "offline_sources", Name: "offline"}, + {ID: "burst_online_sources", Name: "burst_online"}, + {ID: "burst_offline_sources", Name: "burst_offline"}, + {ID: "unresolved_sources", Name: "unresolved"}, + }, + } +) + +//var serverStatsVer1Charts = module.Charts{ +// ntpPacketsChart.Copy(), +// commandPacketsChart.Copy(), +// clientLogRecordsChart.Copy(), +//} +// +//var serverStatsVer2Charts = module.Charts{ +// ntpPacketsChart.Copy(), +// commandPacketsChart.Copy(), +// clientLogRecordsChart.Copy(), +// nkeConnectionChart.Copy(), +//} +// +//var serverStatsVer3Charts = module.Charts{ +// ntpPacketsChart.Copy(), +// commandPacketsChart.Copy(), +// clientLogRecordsChart.Copy(), +// nkeConnectionChart.Copy(), +//} +// +//var serverStatsVer4Charts = module.Charts{ +// ntpPacketsChart.Copy(), +// commandPacketsChart.Copy(), +// clientLogRecordsChart.Copy(), +// nkeConnectionChart.Copy(), +//} + +// ServerStats charts +//var ( +// ntpPacketsChart = module.Chart{ +// ID: "ntp_packets", +// Title: "NTP packets", +// Units: "packets/s", +// Fam: "client requests", +// Ctx: "chrony.ntp_packets", +// Type: module.Stacked, +// Priority: prioNTPPackets, +// Dims: module.Dims{ +// {ID: "ntp_packets_received", Name: "received", Algo: module.Incremental}, +// {ID: "ntp_packets_dropped", Name: "dropped", Algo: module.Incremental}, +// }, +// } +// commandPacketsChart = module.Chart{ +// ID: "command_packets", +// Title: "Command packets", +// Units: "packets/s", +// Fam: "client requests", +// Ctx: "chrony.command_packets", +// Type: module.Stacked, +// Priority: prioCommandPackets, +// Dims: module.Dims{ +// {ID: "command_packets_received", Name: "received", Algo: module.Incremental}, +// {ID: "command_packets_dropped", Name: "dropped", Algo: module.Incremental}, +// }, +// } +// nkeConnectionChart = module.Chart{ +// ID: "nke_connections", +// Title: "NTS-KE connections", +// Units: "connections/s", +// Fam: "client requests", +// Ctx: "chrony.nke_connections", +// Type: module.Stacked, +// Priority: prioNKEConnections, +// Dims: module.Dims{ +// {ID: "nke_connections_accepted", Name: "accepted", Algo: module.Incremental}, +// {ID: "nke_connections_dropped", Name: "dropped", Algo: module.Incremental}, +// }, +// } +// clientLogRecordsChart = module.Chart{ +// ID: "client_log_records", +// Title: "Client log records", +// Units: "records/s", +// Fam: "client requests", +// Ctx: "chrony.client_log_records", +// Type: module.Stacked, +// Priority: prioClientLogRecords, +// Dims: module.Dims{ +// {ID: "client_log_records_dropped", Name: "dropped", Algo: module.Incremental}, +// }, +// } +//) + +//func (c *Chrony) addServerStatsCharts(stats *serverStats) { +// var err error +// +// switch { +// case stats.v1 != nil: +// err = c.Charts().Add(*serverStatsVer1Charts.Copy()...) +// case stats.v2 != nil: +// err = c.Charts().Add(*serverStatsVer2Charts.Copy()...) +// case stats.v3 != nil: +// err = c.Charts().Add(*serverStatsVer3Charts.Copy()...) +// case stats.v4 != nil: +// err = c.Charts().Add(*serverStatsVer4Charts.Copy()...) +// default: +// err = errors.New("unknown stats chart") +// } +// +// if err != nil { +// c.Warning(err) +// } +//} diff --git a/src/go/plugin/go.d/modules/chrony/chrony.go b/src/go/plugin/go.d/modules/chrony/chrony.go new file mode 100644 index 000000000..0bdd3183c --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/chrony.go @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + _ "embed" + "errors" + "sync" + "time" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + "github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/web" + + "github.com/facebook/time/ntp/chrony" +) + +//go:embed "config_schema.json" +var configSchema string + +func init() { + module.Register("chrony", module.Creator{ + JobConfigSchema: configSchema, + Create: func() module.Module { return New() }, + Config: func() any { return &Config{} }, + }) +} + +func New() *Chrony { + return &Chrony{ + Config: Config{ + Address: "127.0.0.1:323", + Timeout: web.Duration(time.Second), + }, + charts: charts.Copy(), + addStatsChartsOnce: &sync.Once{}, + newClient: newChronyClient, + } +} + +type Config struct { + UpdateEvery int `yaml:"update_every,omitempty" json:"update_every"` + Address string `yaml:"address" json:"address"` + Timeout web.Duration `yaml:"timeout,omitempty" json:"timeout"` +} + +type ( + Chrony struct { + module.Base + Config `yaml:",inline" json:""` + + charts *module.Charts + addStatsChartsOnce *sync.Once + + client chronyClient + newClient func(c Config) (chronyClient, error) + } + chronyClient interface { + Tracking() (*chrony.ReplyTracking, error) + Activity() (*chrony.ReplyActivity, error) + ServerStats() (*serverStats, error) + Close() + } +) + +func (c *Chrony) Configuration() any { + return c.Config +} + +func (c *Chrony) Init() error { + if err := c.validateConfig(); err != nil { + c.Errorf("config validation: %v", err) + return err + } + + return nil +} + +func (c *Chrony) Check() error { + mx, err := c.collect() + if err != nil { + c.Error(err) + return err + } + if len(mx) == 0 { + return errors.New("no metrics collected") + + } + return nil +} + +func (c *Chrony) Charts() *module.Charts { + return c.charts +} + +func (c *Chrony) Collect() map[string]int64 { + mx, err := c.collect() + if err != nil { + c.Error(err) + } + + if len(mx) == 0 { + return nil + } + return mx +} + +func (c *Chrony) Cleanup() { + if c.client != nil { + c.client.Close() + c.client = nil + } +} diff --git a/src/go/plugin/go.d/modules/chrony/chrony_test.go b/src/go/plugin/go.d/modules/chrony/chrony_test.go new file mode 100644 index 000000000..407724e75 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/chrony_test.go @@ -0,0 +1,326 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + "errors" + "net" + "os" + "testing" + "time" + + "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module" + + "github.com/facebook/time/ntp/chrony" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + dataConfigJSON, _ = os.ReadFile("testdata/config.json") + dataConfigYAML, _ = os.ReadFile("testdata/config.yaml") +) + +func Test_testDataIsValid(t *testing.T) { + for name, data := range map[string][]byte{ + "dataConfigJSON": dataConfigJSON, + "dataConfigYAML": dataConfigYAML, + } { + assert.NotNil(t, data, name) + } +} + +func TestChrony_ConfigurationSerialize(t *testing.T) { + module.TestConfigurationSerialize(t, &Chrony{}, dataConfigJSON, dataConfigYAML) +} + +func TestChrony_Init(t *testing.T) { + tests := map[string]struct { + config Config + wantFail bool + }{ + "default config": { + config: New().Config, + }, + "unset 'address'": { + wantFail: true, + config: Config{ + Address: "", + }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + c := New() + c.Config = test.config + + if test.wantFail { + assert.Error(t, c.Init()) + } else { + assert.NoError(t, c.Init()) + } + }) + } +} + +func TestChrony_Check(t *testing.T) { + tests := map[string]struct { + prepare func() *Chrony + wantFail bool + }{ + "tracking: success, activity: success": { + wantFail: false, + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{}) }, + }, + "tracking: success, activity: fail": { + wantFail: true, + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnActivity: true}) }, + }, + "tracking: fail, activity: success": { + wantFail: true, + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnTracking: true}) }, + }, + "tracking: fail, activity: fail": { + wantFail: true, + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnTracking: true}) }, + }, + "fail on creating client": { + wantFail: true, + prepare: func() *Chrony { return prepareChronyWithMock(nil) }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + c := test.prepare() + + require.NoError(t, c.Init()) + + if test.wantFail { + assert.Error(t, c.Check()) + } else { + assert.NoError(t, c.Check()) + } + }) + } +} + +func TestChrony_Charts(t *testing.T) { + assert.Equal(t, len(charts), len(*New().Charts())) +} + +func TestChrony_Cleanup(t *testing.T) { + tests := map[string]struct { + prepare func(c *Chrony) + wantClose bool + }{ + "after New": { + wantClose: false, + prepare: func(c *Chrony) {}, + }, + "after Init": { + wantClose: false, + prepare: func(c *Chrony) { _ = c.Init() }, + }, + "after Check": { + wantClose: true, + prepare: func(c *Chrony) { _ = c.Init(); _ = c.Check() }, + }, + "after Collect": { + wantClose: true, + prepare: func(c *Chrony) { _ = c.Init(); _ = c.Collect() }, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + m := &mockClient{} + c := prepareChronyWithMock(m) + test.prepare(c) + + require.NotPanics(t, c.Cleanup) + + if test.wantClose { + assert.True(t, m.closeCalled) + } else { + assert.False(t, m.closeCalled) + } + }) + } +} + +func TestChrony_Collect(t *testing.T) { + tests := map[string]struct { + prepare func() *Chrony + expected map[string]int64 + }{ + "tracking: success, activity: success": { + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{}) }, + expected: map[string]int64{ + "burst_offline_sources": 3, + "burst_online_sources": 4, + "current_correction": 154872, + "frequency": 51051185607, + "last_offset": 3095, + "leap_status_delete_second": 0, + "leap_status_insert_second": 1, + "leap_status_normal": 0, + "leap_status_unsynchronised": 0, + "offline_sources": 2, + "online_sources": 8, + "ref_measurement_time": 63793323616, + "residual_frequency": -571789, + "rms_offset": 130089, + "root_delay": 59576179, + "root_dispersion": 1089275, + "skew": 41821926, + "stratum": 4, + "unresolved_sources": 1, + "update_interval": 1044219238281, + }, + }, + "tracking: success, activity: fail": { + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnActivity: true}) }, + expected: map[string]int64{ + "current_correction": 154872, + "frequency": 51051185607, + "last_offset": 3095, + "leap_status_delete_second": 0, + "leap_status_insert_second": 1, + "leap_status_normal": 0, + "leap_status_unsynchronised": 0, + "ref_measurement_time": 63793323586, + "residual_frequency": -571789, + "rms_offset": 130089, + "root_delay": 59576179, + "root_dispersion": 1089275, + "skew": 41821926, + "stratum": 4, + "update_interval": 1044219238281, + }, + }, + "tracking: fail, activity: success": { + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnTracking: true}) }, + expected: nil, + }, + "tracking: fail, activity: fail": { + prepare: func() *Chrony { return prepareChronyWithMock(&mockClient{errOnTracking: true}) }, + expected: nil, + }, + "fail on creating client": { + prepare: func() *Chrony { return prepareChronyWithMock(nil) }, + expected: nil, + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + c := test.prepare() + + require.NoError(t, c.Init()) + _ = c.Check() + + collected := c.Collect() + copyRefMeasurementTime(collected, test.expected) + + assert.Equal(t, test.expected, collected) + }) + } +} + +func prepareChronyWithMock(m *mockClient) *Chrony { + c := New() + if m == nil { + c.newClient = func(_ Config) (chronyClient, error) { return nil, errors.New("mock.newClient error") } + } else { + c.newClient = func(_ Config) (chronyClient, error) { return m, nil } + } + return c +} + +type mockClient struct { + errOnTracking bool + errOnActivity bool + errOnServerStats bool + closeCalled bool +} + +func (m *mockClient) Tracking() (*chrony.ReplyTracking, error) { + if m.errOnTracking { + return nil, errors.New("mockClient.Tracking call error") + } + reply := chrony.ReplyTracking{ + Tracking: chrony.Tracking{ + RefID: 2728380539, + IPAddr: net.IP("192.0.2.0"), + Stratum: 4, + LeapStatus: 1, + RefTime: time.Time{}, + CurrentCorrection: 0.00015487267228309065, + LastOffset: 3.0953951863921247e-06, + RMSOffset: 0.00013008920359425247, + FreqPPM: -51.051185607910156, + ResidFreqPPM: -0.0005717896274290979, + SkewPPM: 0.0418219268321991, + RootDelay: 0.05957617983222008, + RootDispersion: 0.0010892755817621946, + LastUpdateInterval: 1044.21923828125, + }, + } + return &reply, nil +} + +func (m *mockClient) Activity() (*chrony.ReplyActivity, error) { + if m.errOnActivity { + return nil, errors.New("mockClient.Activity call error") + } + reply := chrony.ReplyActivity{ + Activity: chrony.Activity{ + Online: 8, + Offline: 2, + BurstOnline: 4, + BurstOffline: 3, + Unresolved: 1, + }, + } + return &reply, nil +} + +func (m *mockClient) ServerStats() (*serverStats, error) { + if m.errOnServerStats { + return nil, errors.New("mockClient.ServerStats call error") + } + + reply := serverStats{ + v3: &chrony.ServerStats3{ + NTPHits: 10, + NKEHits: 10, + CMDHits: 10, + NTPDrops: 1, + NKEDrops: 1, + CMDDrops: 1, + LogDrops: 1, + NTPAuthHits: 10, + NTPInterleavedHits: 10, + NTPTimestamps: 0, + NTPSpanSeconds: 0, + }, + } + + return &reply, nil +} + +func (m *mockClient) Close() { + m.closeCalled = true +} + +func copyRefMeasurementTime(dst, src map[string]int64) { + if _, ok := dst["ref_measurement_time"]; !ok { + return + } + if _, ok := src["ref_measurement_time"]; !ok { + return + } + dst["ref_measurement_time"] = src["ref_measurement_time"] +} diff --git a/src/go/plugin/go.d/modules/chrony/client.go b/src/go/plugin/go.d/modules/chrony/client.go new file mode 100644 index 000000000..233e78f19 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/client.go @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + "fmt" + "net" + "time" + + "github.com/facebook/time/ntp/chrony" +) + +func newChronyClient(c Config) (chronyClient, error) { + conn, err := net.DialTimeout("udp", c.Address, c.Timeout.Duration()) + if err != nil { + return nil, err + } + + client := &simpleClient{ + conn: conn, + client: &chrony.Client{Connection: &connWithTimeout{ + Conn: conn, + timeout: c.Timeout.Duration(), + }}, + } + + return client, nil +} + +type connWithTimeout struct { + net.Conn + timeout time.Duration +} + +func (c *connWithTimeout) Read(p []byte) (n int, err error) { + if err := c.Conn.SetReadDeadline(c.deadline()); err != nil { + return 0, err + } + return c.Conn.Read(p) +} + +func (c *connWithTimeout) Write(p []byte) (n int, err error) { + if err := c.Conn.SetWriteDeadline(c.deadline()); err != nil { + return 0, err + } + return c.Conn.Write(p) +} + +func (c *connWithTimeout) deadline() time.Time { + return time.Now().Add(c.timeout) +} + +type simpleClient struct { + conn net.Conn + client *chrony.Client +} + +func (sc *simpleClient) Tracking() (*chrony.ReplyTracking, error) { + req := chrony.NewTrackingPacket() + + reply, err := sc.client.Communicate(req) + if err != nil { + return nil, err + } + + tracking, ok := reply.(*chrony.ReplyTracking) + if !ok { + return nil, fmt.Errorf("unexpected reply type, want=%T, got=%T", &chrony.ReplyTracking{}, reply) + } + return tracking, nil +} + +func (sc *simpleClient) Activity() (*chrony.ReplyActivity, error) { + req := chrony.NewActivityPacket() + + reply, err := sc.client.Communicate(req) + if err != nil { + return nil, err + } + + activity, ok := reply.(*chrony.ReplyActivity) + if !ok { + return nil, fmt.Errorf("unexpected reply type, want=%T, got=%T", &chrony.ReplyActivity{}, reply) + } + return activity, nil +} + +type serverStats struct { + v1 *chrony.ServerStats + v2 *chrony.ServerStats2 + v3 *chrony.ServerStats3 + v4 *chrony.ServerStats4 +} + +func (sc *simpleClient) ServerStats() (*serverStats, error) { + req := chrony.NewServerStatsPacket() + + reply, err := sc.client.Communicate(req) + if err != nil { + return nil, err + } + + var stats serverStats + + switch v := reply.(type) { + case *chrony.ReplyServerStats: + stats.v1 = &chrony.ServerStats{ + NTPHits: v.NTPHits, + CMDHits: v.CMDHits, + NTPDrops: v.NTPDrops, + CMDDrops: v.CMDDrops, + LogDrops: v.LogDrops, + } + case *chrony.ReplyServerStats2: + stats.v2 = &chrony.ServerStats2{ + NTPHits: v.NTPHits, + NKEHits: v.NKEHits, + CMDHits: v.CMDHits, + NTPDrops: v.NTPDrops, + NKEDrops: v.NKEDrops, + CMDDrops: v.CMDDrops, + LogDrops: v.LogDrops, + NTPAuthHits: v.NTPAuthHits, + } + case *chrony.ReplyServerStats3: + stats.v3 = &chrony.ServerStats3{ + NTPHits: v.NTPHits, + NKEHits: v.NKEHits, + CMDHits: v.CMDHits, + NTPDrops: v.NTPDrops, + NKEDrops: v.NKEDrops, + CMDDrops: v.CMDDrops, + LogDrops: v.LogDrops, + NTPAuthHits: v.NTPAuthHits, + NTPInterleavedHits: v.NTPInterleavedHits, + NTPTimestamps: v.NTPTimestamps, + NTPSpanSeconds: v.NTPSpanSeconds, + } + case *chrony.ReplyServerStats4: + stats.v4 = &chrony.ServerStats4{ + NTPHits: v.NTPHits, + NKEHits: v.NKEHits, + CMDHits: v.CMDHits, + NTPDrops: v.NTPDrops, + NKEDrops: v.NKEDrops, + CMDDrops: v.CMDDrops, + LogDrops: v.LogDrops, + NTPAuthHits: v.NTPAuthHits, + NTPInterleavedHits: v.NTPInterleavedHits, + NTPTimestamps: v.NTPTimestamps, + NTPSpanSeconds: v.NTPSpanSeconds, + NTPDaemonRxtimestamps: v.NTPDaemonRxtimestamps, + NTPDaemonTxtimestamps: v.NTPDaemonTxtimestamps, + NTPKernelRxtimestamps: v.NTPKernelRxtimestamps, + NTPKernelTxtimestamps: v.NTPKernelTxtimestamps, + NTPHwRxTimestamps: v.NTPHwRxTimestamps, + NTPHwTxTimestamps: v.NTPHwTxTimestamps, + } + default: + return nil, fmt.Errorf("unexpected reply type, want=ReplyServerStats, got=%T", reply) + } + + return &stats, nil +} + +func (sc *simpleClient) Close() { + if sc.conn != nil { + _ = sc.conn.Close() + sc.conn = nil + } +} diff --git a/src/go/plugin/go.d/modules/chrony/collect.go b/src/go/plugin/go.d/modules/chrony/collect.go new file mode 100644 index 000000000..1a3a286fc --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/collect.go @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + "fmt" + "time" +) + +const scaleFactor = 1000000000 + +func (c *Chrony) collect() (map[string]int64, error) { + if c.client == nil { + client, err := c.newClient(c.Config) + if err != nil { + return nil, err + } + c.client = client + } + + mx := make(map[string]int64) + + if err := c.collectTracking(mx); err != nil { + return nil, err + } + if err := c.collectActivity(mx); err != nil { + return mx, err + } + //if strings.HasPrefix(c.Address, "/") { + // TODO: Allowed only through the Unix domain socket (requires "_chrony" group membership). + // See https://github.com/facebook/time/blob/18207c5d8ddc7242e8d4192985898b6dbe66932c/cmd/ntpcheck/checker/chrony.go#L38 + // ^^ For some reason doesn't work, Chrony doesn't respond. Additional configuration needed? + //if err := c.collectServerStats(mx); err != nil { + // return mx, err + //} + //} + + return mx, nil +} + +const ( + // https://github.com/mlichvar/chrony/blob/7daf34675a5a2487895c74d1578241ca91a4eb70/ntp.h#L70-L75 + leapStatusNormal = 0 + leapStatusInsertSecond = 1 + leapStatusDeleteSecond = 2 + leapStatusUnsynchronised = 3 +) + +func (c *Chrony) collectTracking(mx map[string]int64) error { + reply, err := c.client.Tracking() + if err != nil { + return fmt.Errorf("error on collecting tracking: %v", err) + } + + mx["stratum"] = int64(reply.Stratum) + mx["leap_status_normal"] = boolToInt(reply.LeapStatus == leapStatusNormal) + mx["leap_status_insert_second"] = boolToInt(reply.LeapStatus == leapStatusInsertSecond) + mx["leap_status_delete_second"] = boolToInt(reply.LeapStatus == leapStatusDeleteSecond) + mx["leap_status_unsynchronised"] = boolToInt(reply.LeapStatus == leapStatusUnsynchronised) + mx["root_delay"] = int64(reply.RootDelay * scaleFactor) + mx["root_dispersion"] = int64(reply.RootDispersion * scaleFactor) + mx["skew"] = int64(reply.SkewPPM * scaleFactor) + mx["last_offset"] = int64(reply.LastOffset * scaleFactor) + mx["rms_offset"] = int64(reply.RMSOffset * scaleFactor) + mx["update_interval"] = int64(reply.LastUpdateInterval * scaleFactor) + // handle chrony restarts + if reply.RefTime.Year() != 1970 { + mx["ref_measurement_time"] = time.Now().Unix() - reply.RefTime.Unix() + } + mx["residual_frequency"] = int64(reply.ResidFreqPPM * scaleFactor) + // https://github.com/mlichvar/chrony/blob/5b04f3ca902e5d10aa5948fb7587d30b43941049/client.c#L1706 + mx["current_correction"] = abs(int64(reply.CurrentCorrection * scaleFactor)) + mx["frequency"] = abs(int64(reply.FreqPPM * scaleFactor)) + + return nil +} + +func (c *Chrony) collectActivity(mx map[string]int64) error { + reply, err := c.client.Activity() + if err != nil { + return fmt.Errorf("error on collecting activity: %v", err) + } + + mx["online_sources"] = int64(reply.Online) + mx["offline_sources"] = int64(reply.Offline) + mx["burst_online_sources"] = int64(reply.BurstOnline) + mx["burst_offline_sources"] = int64(reply.BurstOffline) + mx["unresolved_sources"] = int64(reply.Unresolved) + + return nil +} + +//func (c *Chrony) collectServerStats(mx map[string]int64) error { +// stats, err := c.client.ServerStats() +// if err != nil { +// return fmt.Errorf("error on collecting server stats: %v", err) +// } +// +// switch { +// case stats.v4 != nil: +// mx["ntp_packets_received"] = int64(stats.v4.NTPHits) +// mx["ntp_packets_dropped"] = int64(stats.v4.NTPDrops) +// mx["command_packets_received"] = int64(stats.v4.CMDHits) +// mx["command_packets_dropped"] = int64(stats.v4.CMDDrops) +// mx["client_log_records_dropped"] = int64(stats.v4.LogDrops) +// mx["nke_connections_accepted"] = int64(stats.v4.NKEHits) +// mx["nke_connections_dropped"] = int64(stats.v4.NKEDrops) +// mx["authenticated_ntp_packets"] = int64(stats.v4.NTPAuthHits) +// mx["interleaved_ntp_packets"] = int64(stats.v4.NTPInterleavedHits) +// case stats.v3 != nil: +// mx["ntp_packets_received"] = int64(stats.v3.NTPHits) +// mx["ntp_packets_dropped"] = int64(stats.v3.NTPDrops) +// mx["command_packets_received"] = int64(stats.v3.CMDHits) +// mx["command_packets_dropped"] = int64(stats.v3.CMDDrops) +// mx["client_log_records_dropped"] = int64(stats.v3.LogDrops) +// mx["nke_connections_accepted"] = int64(stats.v3.NKEHits) +// mx["nke_connections_dropped"] = int64(stats.v3.NKEDrops) +// mx["authenticated_ntp_packets"] = int64(stats.v3.NTPAuthHits) +// mx["interleaved_ntp_packets"] = int64(stats.v3.NTPInterleavedHits) +// case stats.v2 != nil: +// mx["ntp_packets_received"] = int64(stats.v2.NTPHits) +// mx["ntp_packets_dropped"] = int64(stats.v2.NTPDrops) +// mx["command_packets_received"] = int64(stats.v2.CMDHits) +// mx["command_packets_dropped"] = int64(stats.v2.CMDDrops) +// mx["client_log_records_dropped"] = int64(stats.v2.LogDrops) +// mx["nke_connections_accepted"] = int64(stats.v2.NKEHits) +// mx["nke_connections_dropped"] = int64(stats.v2.NKEDrops) +// mx["authenticated_ntp_packets"] = int64(stats.v2.NTPAuthHits) +// case stats.v1 != nil: +// mx["ntp_packets_received"] = int64(stats.v1.NTPHits) +// mx["ntp_packets_dropped"] = int64(stats.v1.NTPDrops) +// mx["command_packets_received"] = int64(stats.v1.CMDHits) +// mx["command_packets_dropped"] = int64(stats.v1.CMDDrops) +// mx["client_log_records_dropped"] = int64(stats.v1.LogDrops) +// default: +// return errors.New("invalid server stats reply") +// } +// +// //c.addStatsChartsOnce.Do(func() { c.addServerStatsCharts(stats) }) +// +// return nil +//} + +func boolToInt(v bool) int64 { + if v { + return 1 + } + return 0 +} + +func abs(v int64) int64 { + if v < 0 { + return -v + } + return v +} diff --git a/src/go/plugin/go.d/modules/chrony/config_schema.json b/src/go/plugin/go.d/modules/chrony/config_schema.json new file mode 100644 index 000000000..5de10a822 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/config_schema.json @@ -0,0 +1,43 @@ +{ + "jsonSchema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Chrony collector configuration.", + "type": "object", + "properties": { + "update_every": { + "title": "Update every", + "description": "Data collection interval, measured in seconds.", + "type": "integer", + "minimum": 1, + "default": 1 + }, + "address": { + "title": "Address", + "description": "The IP address and port where Chrony daemon listens for incoming connections.", + "type": "string", + "default": "127.0.0.1:323" + }, + "timeout": { + "title": "Timeout", + "description": "Timeout for establishing a connection and communication (reading and writing) in seconds.", + "type": "number", + "default": 1 + } + }, + "required": [ + "address" + ], + "additionalProperties": false, + "patternProperties": { + "^name$": {} + } + }, + "uiSchema": { + "uiOptions": { + "fullPage": true + }, + "timeout": { + "ui:help": "Accepts decimals for precise control (e.g., type 1.5 for 1.5 seconds)." + } + } +} diff --git a/src/go/plugin/go.d/modules/chrony/init.go b/src/go/plugin/go.d/modules/chrony/init.go new file mode 100644 index 000000000..828112c9d --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/init.go @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package chrony + +import ( + "errors" +) + +func (c *Chrony) validateConfig() error { + if c.Address == "" { + return errors.New("empty 'address'") + } + return nil +} diff --git a/src/go/plugin/go.d/modules/chrony/integrations/chrony.md b/src/go/plugin/go.d/modules/chrony/integrations/chrony.md new file mode 100644 index 000000000..e9b9454d9 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/integrations/chrony.md @@ -0,0 +1,222 @@ +<!--startmeta +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/go/plugin/go.d/modules/chrony/README.md" +meta_yaml: "https://github.com/netdata/netdata/edit/master/src/go/plugin/go.d/modules/chrony/metadata.yaml" +sidebar_label: "Chrony" +learn_status: "Published" +learn_rel_path: "Collecting Metrics/System Clock and NTP" +most_popular: False +message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE" +endmeta--> + +# Chrony + + +<img src="https://netdata.cloud/img/chrony.jpg" width="150"/> + + +Plugin: go.d.plugin +Module: chrony + +<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" /> + +## Overview + +This collector monitors the system's clock performance and peers activity status + +It collects metrics by sending UDP packets to chronyd using the Chrony communication protocol v6. + +This collector is supported on all platforms. + +This collector supports collecting metrics from multiple instances of this integration, including remote instances. + + +### Default Behavior + +#### Auto-Detection + +This collector discovers Chrony instance running on the local host and listening on port 323. +On startup, it tries to collect metrics from: + +- 127.0.0.1:323 + + +#### Limits + +The default configuration for this integration does not impose any limits on data collection. + +#### Performance Impact + +The default configuration for this integration is not expected to impose a significant performance impact on the system. + + +## Metrics + +Metrics grouped by *scope*. + +The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels. + + + +### Per Chrony instance + +These metrics refer to the entire monitored application. + +This scope has no labels. + +Metrics: + +| Metric | Dimensions | Unit | +|:------|:----------|:----| +| chrony.stratum | stratum | level | +| chrony.current_correction | current_correction | seconds | +| chrony.root_delay | root_delay | seconds | +| chrony.root_dispersion | root_delay | seconds | +| chrony.last_offset | offset | seconds | +| chrony.rms_offset | offset | seconds | +| chrony.frequency | frequency | ppm | +| chrony.residual_frequency | residual_frequency | ppm | +| chrony.skew | skew | ppm | +| chrony.update_interval | update_interval | seconds | +| chrony.ref_measurement_time | ref_measurement_time | seconds | +| chrony.leap_status | normal, insert_second, delete_second, unsynchronised | status | +| chrony.activity | online, offline, burst_online, burst_offline, unresolved | sources | + + + +## Alerts + +There are no alerts configured by default for this integration. + + +## Setup + +### Prerequisites + +No action required. + +### Configuration + +#### File + +The configuration file name for this integration is `go.d/chrony.conf`. + + +You can edit the configuration file using the `edit-config` script from the +Netdata [config directory](/docs/netdata-agent/configuration/README.md#the-netdata-config-directory). + +```bash +cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata +sudo ./edit-config go.d/chrony.conf +``` +#### Options + +The following options can be defined globally: update_every, autodetection_retry. + + +<details open><summary>Config options</summary> + +| Name | Description | Default | Required | +|:----|:-----------|:-------|:--------:| +| update_every | Data collection frequency. | 5 | no | +| autodetection_retry | Recheck interval in seconds. Zero means no recheck will be scheduled. | 0 | no | +| address | Server address. The format is IP:PORT. | 127.0.0.1:323 | yes | +| timeout | Connection timeout. Zero means no timeout. | 1 | no | + +</details> + +#### Examples + +##### Basic + +A basic example configuration. + +```yaml +jobs: + - name: local + address: 127.0.0.1:323 + +``` +##### Multi-instance + +> **Note**: When you define multiple jobs, their names must be unique. + +Collecting metrics from local and remote instances. + + +<details open><summary>Config</summary> + +```yaml +jobs: + - name: local + address: 127.0.0.1:323 + + - name: remote + address: 192.0.2.1:323 + +``` +</details> + + + +## Troubleshooting + +### Debug Mode + +**Important**: Debug mode is not supported for data collection jobs created via the UI using the Dyncfg feature. + +To troubleshoot issues with the `chrony` collector, run the `go.d.plugin` with the debug option enabled. The output +should give you clues as to why the collector isn't working. + +- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on + your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`. + + ```bash + cd /usr/libexec/netdata/plugins.d/ + ``` + +- Switch to the `netdata` user. + + ```bash + sudo -u netdata -s + ``` + +- Run the `go.d.plugin` to debug the collector: + + ```bash + ./go.d.plugin -d -m chrony + ``` + +### Getting Logs + +If you're encountering problems with the `chrony` collector, follow these steps to retrieve logs and identify potential issues: + +- **Run the command** specific to your system (systemd, non-systemd, or Docker container). +- **Examine the output** for any warnings or error messages that might indicate issues. These messages should provide clues about the root cause of the problem. + +#### System with systemd + +Use the following command to view logs generated since the last Netdata service restart: + +```bash +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep chrony +``` + +#### System without systemd + +Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name: + +```bash +grep chrony /var/log/netdata/collector.log +``` + +**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues. + +#### Docker Container + +If your Netdata runs in a Docker container named "netdata" (replace if different), use this command: + +```bash +docker logs netdata 2>&1 | grep chrony +``` + + diff --git a/src/go/plugin/go.d/modules/chrony/metadata.yaml b/src/go/plugin/go.d/modules/chrony/metadata.yaml new file mode 100644 index 000000000..18f9152e6 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/metadata.yaml @@ -0,0 +1,208 @@ +plugin_name: go.d.plugin +modules: + - meta: + id: collector-go.d.plugin-chrony + module_name: chrony + plugin_name: go.d.plugin + monitored_instance: + categories: + - data-collection.system-clock-and-ntp + icon_filename: chrony.jpg + name: Chrony + link: https://chrony.tuxfamily.org/ + alternative_monitored_instances: [] + keywords: [] + info_provided_to_referring_integrations: + description: "" + related_resources: + integrations: + list: [] + most_popular: false + overview: + data_collection: + metrics_description: This collector monitors the system's clock performance and peers activity status + method_description: It collects metrics by sending UDP packets to chronyd using the Chrony communication protocol v6. + supported_platforms: + include: [] + exclude: [] + multi_instance: true + additional_permissions: + description: "" + default_behavior: + auto_detection: + description: | + This collector discovers Chrony instance running on the local host and listening on port 323. + On startup, it tries to collect metrics from: + + - 127.0.0.1:323 + limits: + description: "" + performance_impact: + description: "" + setup: + prerequisites: + list: [] + configuration: + file: + name: go.d/chrony.conf + options: + description: | + The following options can be defined globally: update_every, autodetection_retry. + folding: + title: Config options + enabled: true + list: + - name: update_every + description: Data collection frequency. + default_value: 5 + required: false + - name: autodetection_retry + description: Recheck interval in seconds. Zero means no recheck will be scheduled. + default_value: 0 + required: false + - name: address + description: Server address. The format is IP:PORT. + default_value: 127.0.0.1:323 + required: true + - name: timeout + description: Connection timeout. Zero means no timeout. + default_value: 1 + required: false + examples: + folding: + title: Config + enabled: true + list: + - name: Basic + folding: + enabled: false + description: A basic example configuration. + config: | + jobs: + - name: local + address: 127.0.0.1:323 + - name: Multi-instance + description: | + > **Note**: When you define multiple jobs, their names must be unique. + + Collecting metrics from local and remote instances. + config: | + jobs: + - name: local + address: 127.0.0.1:323 + + - name: remote + address: 192.0.2.1:323 + troubleshooting: + problems: + list: [] + alerts: [] + metrics: + folding: + title: Metrics + enabled: false + description: "" + availability: [] + scopes: + - name: global + description: These metrics refer to the entire monitored application. + labels: [] + metrics: + - name: chrony.stratum + availability: [] + description: Distance to the reference clock + unit: level + chart_type: line + dimensions: + - name: stratum + - name: chrony.current_correction + availability: [] + description: Current correction + unit: seconds + chart_type: line + dimensions: + - name: current_correction + - name: chrony.root_delay + availability: [] + description: Network path delay to stratum-1 + unit: seconds + chart_type: line + dimensions: + - name: root_delay + - name: chrony.root_dispersion + availability: [] + description: Dispersion accumulated back to stratum-1 + unit: seconds + chart_type: line + dimensions: + - name: root_delay + - name: chrony.last_offset + availability: [] + description: Offset on the last clock update + unit: seconds + chart_type: line + dimensions: + - name: offset + - name: chrony.rms_offset + availability: [] + description: Long-term average of the offset value + unit: seconds + chart_type: line + dimensions: + - name: offset + - name: chrony.frequency + availability: [] + description: Frequency + unit: ppm + chart_type: line + dimensions: + - name: frequency + - name: chrony.residual_frequency + availability: [] + description: Residual frequency + unit: ppm + chart_type: line + dimensions: + - name: residual_frequency + - name: chrony.skew + availability: [] + description: Skew + unit: ppm + chart_type: line + dimensions: + - name: skew + - name: chrony.update_interval + availability: [] + description: Interval between the last two clock updates + unit: seconds + chart_type: line + dimensions: + - name: update_interval + - name: chrony.ref_measurement_time + availability: [] + description: Time since the last measurement + unit: seconds + chart_type: line + dimensions: + - name: ref_measurement_time + - name: chrony.leap_status + availability: [] + description: Leap status + unit: status + chart_type: line + dimensions: + - name: normal + - name: insert_second + - name: delete_second + - name: unsynchronised + - name: chrony.activity + availability: [] + description: Peers activity + unit: sources + chart_type: stacked + dimensions: + - name: online + - name: offline + - name: burst_online + - name: burst_offline + - name: unresolved diff --git a/src/go/plugin/go.d/modules/chrony/testdata/config.json b/src/go/plugin/go.d/modules/chrony/testdata/config.json new file mode 100644 index 000000000..e86834720 --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/testdata/config.json @@ -0,0 +1,5 @@ +{ + "update_every": 123, + "address": "ok", + "timeout": 123.123 +} diff --git a/src/go/plugin/go.d/modules/chrony/testdata/config.yaml b/src/go/plugin/go.d/modules/chrony/testdata/config.yaml new file mode 100644 index 000000000..1b81d09eb --- /dev/null +++ b/src/go/plugin/go.d/modules/chrony/testdata/config.yaml @@ -0,0 +1,3 @@ +update_every: 123 +address: "ok" +timeout: 123.123 |