diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/go/collectors/go.d.plugin/modules/coredns/collect.go | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/go/collectors/go.d.plugin/modules/coredns/collect.go')
-rw-r--r-- | src/go/collectors/go.d.plugin/modules/coredns/collect.go | 713 |
1 files changed, 713 insertions, 0 deletions
diff --git a/src/go/collectors/go.d.plugin/modules/coredns/collect.go b/src/go/collectors/go.d.plugin/modules/coredns/collect.go new file mode 100644 index 000000000..b77f9a29c --- /dev/null +++ b/src/go/collectors/go.d.plugin/modules/coredns/collect.go @@ -0,0 +1,713 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +package coredns + +import ( + "errors" + "fmt" + "strings" + + "github.com/blang/semver/v4" + "github.com/netdata/netdata/go/go.d.plugin/pkg/prometheus" + "github.com/netdata/netdata/go/go.d.plugin/pkg/stm" +) + +const ( + metricPanicCountTotal169orOlder = "coredns_panic_count_total" + metricRequestCountTotal169orOlder = "coredns_dns_request_count_total" + metricRequestTypeCountTotal169orOlder = "coredns_dns_request_type_count_total" + metricResponseRcodeCountTotal169orOlder = "coredns_dns_response_rcode_count_total" + + metricPanicCountTotal170orNewer = "coredns_panics_total" + metricRequestCountTotal170orNewer = "coredns_dns_requests_total" + metricRequestTypeCountTotal170orNewer = "coredns_dns_requests_total" + metricResponseRcodeCountTotal170orNewer = "coredns_dns_responses_total" +) + +var ( + empty = "" + dropped = "dropped" + emptyServerReplaceName = "empty" + rootZoneReplaceName = "root" + version169 = semver.MustParse("1.6.9") +) + +type requestMetricsNames struct { + panicCountTotal string + // true for all metrics below: + // - if none of server block matches 'server' tag is "", empty server has only one zone - dropped. + // example: + // coredns_dns_requests_total{family="1",proto="udp",server="",zone="dropped"} 1 for + // - dropped requests are added to both dropped and corresponding zone + // example: + // coredns_dns_requests_total{family="1",proto="udp",server="dns://:53",zone="dropped"} 2 + // coredns_dns_requests_total{family="1",proto="udp",server="dns://:53",zone="ya.ru."} 2 + requestCountTotal string + requestTypeCountTotal string + responseRcodeCountTotal string +} + +func (cd *CoreDNS) collect() (map[string]int64, error) { + raw, err := cd.prom.ScrapeSeries() + + if err != nil { + return nil, err + } + + mx := newMetrics() + + // some metric names are different depending on the version + // update them once + if !cd.skipVersionCheck { + cd.updateVersionDependentMetrics(raw) + cd.skipVersionCheck = true + } + + //we can only get these metrics if we know the server version + if cd.version == nil { + return nil, errors.New("unable to determine server version") + } + + cd.collectPanic(mx, raw) + cd.collectSummaryRequests(mx, raw) + cd.collectSummaryRequestsPerType(mx, raw) + cd.collectSummaryResponsesPerRcode(mx, raw) + + if cd.perServerMatcher != nil { + cd.collectPerServerRequests(mx, raw) + //cd.collectPerServerRequestsDuration(mx, raw) + cd.collectPerServerRequestPerType(mx, raw) + cd.collectPerServerResponsePerRcode(mx, raw) + } + + if cd.perZoneMatcher != nil { + cd.collectPerZoneRequests(mx, raw) + //cd.collectPerZoneRequestsDuration(mx, raw) + cd.collectPerZoneRequestsPerType(mx, raw) + cd.collectPerZoneResponsesPerRcode(mx, raw) + } + + return stm.ToMap(mx), nil +} + +func (cd *CoreDNS) updateVersionDependentMetrics(raw prometheus.Series) { + version := cd.parseVersion(raw) + if version == nil { + return + } + cd.version = version + if cd.version.LTE(version169) { + cd.metricNames.panicCountTotal = metricPanicCountTotal169orOlder + cd.metricNames.requestCountTotal = metricRequestCountTotal169orOlder + cd.metricNames.requestTypeCountTotal = metricRequestTypeCountTotal169orOlder + cd.metricNames.responseRcodeCountTotal = metricResponseRcodeCountTotal169orOlder + } else { + cd.metricNames.panicCountTotal = metricPanicCountTotal170orNewer + cd.metricNames.requestCountTotal = metricRequestCountTotal170orNewer + cd.metricNames.requestTypeCountTotal = metricRequestTypeCountTotal170orNewer + cd.metricNames.responseRcodeCountTotal = metricResponseRcodeCountTotal170orNewer + } +} + +func (cd *CoreDNS) parseVersion(raw prometheus.Series) *semver.Version { + var versionStr string + for _, metric := range raw.FindByName("coredns_build_info") { + versionStr = metric.Labels.Get("version") + } + if versionStr == "" { + cd.Error("cannot find version string in metrics") + return nil + } + + version, err := semver.Make(versionStr) + if err != nil { + cd.Errorf("failed to find server version: %v", err) + return nil + } + return &version +} + +func (cd *CoreDNS) collectPanic(mx *metrics, raw prometheus.Series) { + mx.Panic.Set(raw.FindByName(cd.metricNames.panicCountTotal).Max()) +} + +func (cd *CoreDNS) collectSummaryRequests(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestCountTotal) { + var ( + family = metric.Labels.Get("family") + proto = metric.Labels.Get("proto") + server = metric.Labels.Get("server") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if family == empty || proto == empty || zone == empty { + continue + } + + if server == empty { + mx.NoZoneDropped.Add(value) + } + + setRequestPerStatus(&mx.Summary.Request, value, server, zone) + + if zone == dropped && server != empty { + continue + } + + mx.Summary.Request.Total.Add(value) + setRequestPerIPFamily(&mx.Summary.Request, value, family) + setRequestPerProto(&mx.Summary.Request, value, proto) + } +} + +//func (cd *CoreDNS) collectSummaryRequestsDuration(mx *metrics, raw prometheus.Series) { +// for _, metric := range raw.FindByName(metricRequestDurationSecondsBucket) { +// var ( +// server = metric.Labels.Get("server") +// zone = metric.Labels.Get("zone") +// le = metric.Labels.Get("le") +// value = metric.Value +// ) +// +// if zone == empty || zone == dropped && server != empty || le == empty { +// continue +// } +// +// setRequestDuration(&mx.Summary.Request, value, le) +// } +// processRequestDuration(&mx.Summary.Request) +//} + +func (cd *CoreDNS) collectSummaryRequestsPerType(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestTypeCountTotal) { + var ( + server = metric.Labels.Get("server") + typ = metric.Labels.Get("type") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if typ == empty || zone == empty || zone == dropped && server != empty { + continue + } + + setRequestPerType(&mx.Summary.Request, value, typ) + } +} + +func (cd *CoreDNS) collectSummaryResponsesPerRcode(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.responseRcodeCountTotal) { + var ( + rcode = metric.Labels.Get("rcode") + server = metric.Labels.Get("server") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if rcode == empty || zone == empty || zone == dropped && server != empty { + continue + } + + setResponsePerRcode(&mx.Summary.Response, value, rcode) + } +} + +// Per Server + +func (cd *CoreDNS) collectPerServerRequests(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestCountTotal) { + var ( + family = metric.Labels.Get("family") + proto = metric.Labels.Get("proto") + server = metric.Labels.Get("server") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if family == empty || proto == empty || zone == empty { + continue + } + + if !cd.perServerMatcher.MatchString(server) { + continue + } + + if server == empty { + server = emptyServerReplaceName + } + + if !cd.collectedServers[server] { + cd.addNewServerCharts(server) + cd.collectedServers[server] = true + } + + if _, ok := mx.PerServer[server]; !ok { + mx.PerServer[server] = &requestResponse{} + } + + srv := mx.PerServer[server] + + setRequestPerStatus(&srv.Request, value, server, zone) + + if zone == dropped && server != emptyServerReplaceName { + continue + } + + srv.Request.Total.Add(value) + setRequestPerIPFamily(&srv.Request, value, family) + setRequestPerProto(&srv.Request, value, proto) + } +} + +//func (cd *CoreDNS) collectPerServerRequestsDuration(mx *metrics, raw prometheus.Series) { +// for _, metric := range raw.FindByName(metricRequestDurationSecondsBucket) { +// var ( +// server = metric.Labels.Get("server") +// zone = metric.Labels.Get("zone") +// le = metric.Labels.Get("le") +// value = metric.Value +// ) +// +// if zone == empty || zone == dropped && server != empty || le == empty { +// continue +// } +// +// if !cd.perServerMatcher.MatchString(server) { +// continue +// } +// +// if server == empty { +// server = emptyServerReplaceName +// } +// +// if !cd.collectedServers[server] { +// cd.addNewServerCharts(server) +// cd.collectedServers[server] = true +// } +// +// if _, ok := mx.PerServer[server]; !ok { +// mx.PerServer[server] = &requestResponse{} +// } +// +// setRequestDuration(&mx.PerServer[server].Request, value, le) +// } +// for _, s := range mx.PerServer { +// processRequestDuration(&s.Request) +// } +//} + +func (cd *CoreDNS) collectPerServerRequestPerType(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestTypeCountTotal) { + var ( + server = metric.Labels.Get("server") + typ = metric.Labels.Get("type") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if typ == empty || zone == empty || zone == dropped && server != empty { + continue + } + + if !cd.perServerMatcher.MatchString(server) { + continue + } + + if server == empty { + server = emptyServerReplaceName + } + + if !cd.collectedServers[server] { + cd.addNewServerCharts(server) + cd.collectedServers[server] = true + } + + if _, ok := mx.PerServer[server]; !ok { + mx.PerServer[server] = &requestResponse{} + } + + setRequestPerType(&mx.PerServer[server].Request, value, typ) + } +} + +func (cd *CoreDNS) collectPerServerResponsePerRcode(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.responseRcodeCountTotal) { + var ( + rcode = metric.Labels.Get("rcode") + server = metric.Labels.Get("server") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if rcode == empty || zone == empty || zone == dropped && server != empty { + continue + } + + if !cd.perServerMatcher.MatchString(server) { + continue + } + + if server == empty { + server = emptyServerReplaceName + } + + if !cd.collectedServers[server] { + cd.addNewServerCharts(server) + cd.collectedServers[server] = true + } + + if _, ok := mx.PerServer[server]; !ok { + mx.PerServer[server] = &requestResponse{} + } + + setResponsePerRcode(&mx.PerServer[server].Response, value, rcode) + } +} + +// Per Zone + +func (cd *CoreDNS) collectPerZoneRequests(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestCountTotal) { + var ( + family = metric.Labels.Get("family") + proto = metric.Labels.Get("proto") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if family == empty || proto == empty || zone == empty { + continue + } + + if !cd.perZoneMatcher.MatchString(zone) { + continue + } + + if zone == "." { + zone = rootZoneReplaceName + } + + if !cd.collectedZones[zone] { + cd.addNewZoneCharts(zone) + cd.collectedZones[zone] = true + } + + if _, ok := mx.PerZone[zone]; !ok { + mx.PerZone[zone] = &requestResponse{} + } + + zoneMX := mx.PerZone[zone] + zoneMX.Request.Total.Add(value) + setRequestPerIPFamily(&zoneMX.Request, value, family) + setRequestPerProto(&zoneMX.Request, value, proto) + } +} + +//func (cd *CoreDNS) collectPerZoneRequestsDuration(mx *metrics, raw prometheus.Series) { +// for _, metric := range raw.FindByName(metricRequestDurationSecondsBucket) { +// var ( +// zone = metric.Labels.Get("zone") +// le = metric.Labels.Get("le") +// value = metric.Value +// ) +// +// if zone == empty || le == empty { +// continue +// } +// +// if !cd.perZoneMatcher.MatchString(zone) { +// continue +// } +// +// if zone == "." { +// zone = rootZoneReplaceName +// } +// +// if !cd.collectedZones[zone] { +// cd.addNewZoneCharts(zone) +// cd.collectedZones[zone] = true +// } +// +// if _, ok := mx.PerZone[zone]; !ok { +// mx.PerZone[zone] = &requestResponse{} +// } +// +// setRequestDuration(&mx.PerZone[zone].Request, value, le) +// } +// for _, s := range mx.PerZone { +// processRequestDuration(&s.Request) +// } +//} + +func (cd *CoreDNS) collectPerZoneRequestsPerType(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.requestTypeCountTotal) { + var ( + typ = metric.Labels.Get("type") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if typ == empty || zone == empty { + continue + } + + if !cd.perZoneMatcher.MatchString(zone) { + continue + } + + if zone == "." { + zone = rootZoneReplaceName + } + + if !cd.collectedZones[zone] { + cd.addNewZoneCharts(zone) + cd.collectedZones[zone] = true + } + + if _, ok := mx.PerZone[zone]; !ok { + mx.PerZone[zone] = &requestResponse{} + } + + setRequestPerType(&mx.PerZone[zone].Request, value, typ) + } +} + +func (cd *CoreDNS) collectPerZoneResponsesPerRcode(mx *metrics, raw prometheus.Series) { + for _, metric := range raw.FindByName(cd.metricNames.responseRcodeCountTotal) { + var ( + rcode = metric.Labels.Get("rcode") + zone = metric.Labels.Get("zone") + value = metric.Value + ) + + if rcode == empty || zone == empty { + continue + } + + if !cd.perZoneMatcher.MatchString(zone) { + continue + } + + if zone == "." { + zone = rootZoneReplaceName + } + + if !cd.collectedZones[zone] { + cd.addNewZoneCharts(zone) + cd.collectedZones[zone] = true + } + + if _, ok := mx.PerZone[zone]; !ok { + mx.PerZone[zone] = &requestResponse{} + } + + setResponsePerRcode(&mx.PerZone[zone].Response, value, rcode) + } +} + +// --- + +func setRequestPerIPFamily(mx *request, value float64, family string) { + switch family { + case "1": + mx.PerIPFamily.IPv4.Add(value) + case "2": + mx.PerIPFamily.IPv6.Add(value) + } +} + +func setRequestPerProto(mx *request, value float64, proto string) { + switch proto { + case "udp": + mx.PerProto.UDP.Add(value) + case "tcp": + mx.PerProto.TCP.Add(value) + } +} + +func setRequestPerStatus(mx *request, value float64, server, zone string) { + switch zone { + default: + mx.PerStatus.Processed.Add(value) + case "dropped": + mx.PerStatus.Dropped.Add(value) + if server == empty || server == emptyServerReplaceName { + return + } + mx.PerStatus.Processed.Sub(value) + } +} + +func setRequestPerType(mx *request, value float64, typ string) { + switch typ { + default: + mx.PerType.Other.Add(value) + case "A": + mx.PerType.A.Add(value) + case "AAAA": + mx.PerType.AAAA.Add(value) + case "MX": + mx.PerType.MX.Add(value) + case "SOA": + mx.PerType.SOA.Add(value) + case "CNAME": + mx.PerType.CNAME.Add(value) + case "PTR": + mx.PerType.PTR.Add(value) + case "TXT": + mx.PerType.TXT.Add(value) + case "NS": + mx.PerType.NS.Add(value) + case "DS": + mx.PerType.DS.Add(value) + case "DNSKEY": + mx.PerType.DNSKEY.Add(value) + case "RRSIG": + mx.PerType.RRSIG.Add(value) + case "NSEC": + mx.PerType.NSEC.Add(value) + case "NSEC3": + mx.PerType.NSEC3.Add(value) + case "IXFR": + mx.PerType.IXFR.Add(value) + case "ANY": + mx.PerType.ANY.Add(value) + } +} + +func setResponsePerRcode(mx *response, value float64, rcode string) { + mx.Total.Add(value) + + switch rcode { + default: + mx.PerRcode.Other.Add(value) + case "NOERROR": + mx.PerRcode.NOERROR.Add(value) + case "FORMERR": + mx.PerRcode.FORMERR.Add(value) + case "SERVFAIL": + mx.PerRcode.SERVFAIL.Add(value) + case "NXDOMAIN": + mx.PerRcode.NXDOMAIN.Add(value) + case "NOTIMP": + mx.PerRcode.NOTIMP.Add(value) + case "REFUSED": + mx.PerRcode.REFUSED.Add(value) + case "YXDOMAIN": + mx.PerRcode.YXDOMAIN.Add(value) + case "YXRRSET": + mx.PerRcode.YXRRSET.Add(value) + case "NXRRSET": + mx.PerRcode.NXRRSET.Add(value) + case "NOTAUTH": + mx.PerRcode.NOTAUTH.Add(value) + case "NOTZONE": + mx.PerRcode.NOTZONE.Add(value) + case "BADSIG": + mx.PerRcode.BADSIG.Add(value) + case "BADKEY": + mx.PerRcode.BADKEY.Add(value) + case "BADTIME": + mx.PerRcode.BADTIME.Add(value) + case "BADMODE": + mx.PerRcode.BADMODE.Add(value) + case "BADNAME": + mx.PerRcode.BADNAME.Add(value) + case "BADALG": + mx.PerRcode.BADALG.Add(value) + case "BADTRUNC": + mx.PerRcode.BADTRUNC.Add(value) + case "BADCOOKIE": + mx.PerRcode.BADCOOKIE.Add(value) + } +} + +//func setRequestDuration(mx *request, value float64, le string) { +// switch le { +// case "0.00025": +// mx.Duration.LE000025.Add(value) +// case "0.0005": +// mx.Duration.LE00005.Add(value) +// case "0.001": +// mx.Duration.LE0001.Add(value) +// case "0.002": +// mx.Duration.LE0002.Add(value) +// case "0.004": +// mx.Duration.LE0004.Add(value) +// case "0.008": +// mx.Duration.LE0008.Add(value) +// case "0.016": +// mx.Duration.LE0016.Add(value) +// case "0.032": +// mx.Duration.LE0032.Add(value) +// case "0.064": +// mx.Duration.LE0064.Add(value) +// case "0.128": +// mx.Duration.LE0128.Add(value) +// case "0.256": +// mx.Duration.LE0256.Add(value) +// case "0.512": +// mx.Duration.LE0512.Add(value) +// case "1.024": +// mx.Duration.LE1024.Add(value) +// case "2.048": +// mx.Duration.LE2048.Add(value) +// case "4.096": +// mx.Duration.LE4096.Add(value) +// case "8.192": +// mx.Duration.LE8192.Add(value) +// case "+Inf": +// mx.Duration.LEInf.Add(value) +// } +//} + +//func processRequestDuration(mx *request) { +// mx.Duration.LEInf.Sub(mx.Duration.LE8192.Value()) +// mx.Duration.LE8192.Sub(mx.Duration.LE4096.Value()) +// mx.Duration.LE4096.Sub(mx.Duration.LE2048.Value()) +// mx.Duration.LE2048.Sub(mx.Duration.LE1024.Value()) +// mx.Duration.LE1024.Sub(mx.Duration.LE0512.Value()) +// mx.Duration.LE0512.Sub(mx.Duration.LE0256.Value()) +// mx.Duration.LE0256.Sub(mx.Duration.LE0128.Value()) +// mx.Duration.LE0128.Sub(mx.Duration.LE0064.Value()) +// mx.Duration.LE0064.Sub(mx.Duration.LE0032.Value()) +// mx.Duration.LE0032.Sub(mx.Duration.LE0016.Value()) +// mx.Duration.LE0016.Sub(mx.Duration.LE0008.Value()) +// mx.Duration.LE0008.Sub(mx.Duration.LE0004.Value()) +// mx.Duration.LE0004.Sub(mx.Duration.LE0002.Value()) +// mx.Duration.LE0002.Sub(mx.Duration.LE0001.Value()) +// mx.Duration.LE0001.Sub(mx.Duration.LE00005.Value()) +// mx.Duration.LE00005.Sub(mx.Duration.LE000025.Value()) +//} + +// --- + +func (cd *CoreDNS) addNewServerCharts(name string) { + charts := serverCharts.Copy() + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, "server", name) + chart.Title = fmt.Sprintf(chart.Title, "Server", name) + chart.Fam = fmt.Sprintf(chart.Fam, "server", name) + + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, name) + } + } + _ = cd.charts.Add(*charts...) +} + +func (cd *CoreDNS) addNewZoneCharts(name string) { + charts := zoneCharts.Copy() + for _, chart := range *charts { + chart.ID = fmt.Sprintf(chart.ID, "zone", name) + chart.Title = fmt.Sprintf(chart.Title, "Zone", name) + chart.Fam = fmt.Sprintf(chart.Fam, "zone", name) + chart.Ctx = strings.Replace(chart.Ctx, "coredns.server_", "coredns.zone_", 1) + + for _, dim := range chart.Dims { + dim.ID = fmt.Sprintf(dim.ID, name) + } + } + _ = cd.charts.Add(*charts...) +} |