1
0
Fork 0
knot-resolver/modules/http/prometheus.lua
Daniel Baumann fbc604e215
Adding upstream version 5.7.5.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
2025-06-21 13:56:17 +02:00

178 lines
5.1 KiB
Lua

-- SPDX-License-Identifier: GPL-3.0-or-later
-- Module implementation
local M = {
namespace = '',
finalize = function (_ --[[metrics]]) end,
}
-- Gauge metrics
local gauges = {
['worker.concurrent'] = true,
['worker.rss'] = true,
}
local function merge(t, results, prefix)
for _, result in pairs(results) do
if type(result) == 'table' then
for k, v in pairs(result) do
local val = t[prefix..k]
t[prefix..k] = (val or 0) + v
end
end
end
end
local function getstats()
local t = {}
merge(t, map 'stats.list()', '')
merge(t, map 'cache.stats()', 'cache.')
merge(t, map 'worker.stats()', 'worker.')
return t
end
-- @returns current stats + difference against previous data set passed in @param prev
local function snapshot_start(prev)
assert(type(prev) == 'table', 'table with previous values expected')
local is_empty = true
-- Get current snapshot
local cur, stats_dt = getstats(), {}
for k,v in pairs(cur) do
if gauges[k] then
stats_dt[k] = v
else
stats_dt[k] = v - (prev[k] or 0)
end
is_empty = is_empty and stats_dt[k] == 0
end
-- Calculate upstreams and geotag them if possible
local upstreams
if http.geoip then
upstreams = stats.upstreams()
for k,v in pairs(upstreams) do
local gi
if string.find(k, '.', 1, true) then
gi = http.geoip:search_ipv4(k)
else
gi = http.geoip:search_ipv6(k)
end
if gi then
upstreams[k] = {data=v, location=gi.location, country=gi.country and gi.country.iso_code}
end
end
end
-- Aggregate per-worker metrics
local wdata = {}
for _, info in pairs(map 'worker.info()') do
if type(info) == 'table' then
wdata[tostring(info.pid)] = {
rss = info.rss,
usertime = info.usertime,
systime = info.systime,
pagefaults = info.pagefaults,
queries = info.queries
}
end
end
-- Publish stats updates periodically
if not is_empty then
local update = {time=os.time(), stats=stats_dt, upstreams=upstreams, workers=wdata}
return cur, update
end
return cur, nil
end
-- Function to sort frequency list
local function stream_stats(_, ws)
local ok = true
-- Publish stats updates periodically
local prev = getstats()
while ok do
worker.sleep(1)
local update
prev, update = snapshot_start(prev)
local push = tojson(update)
ok = ws:send(push)
end
end
-- Transform metrics from Graphite to Prometheus format
-- See: https://gitlab.nic.cz/knot/knot-resolver/-/issues/650
-- E.g.:
-- worker.ipv4 -> worker_ipv4
-- answer.blocked;stype=A -> answer_blocked{stype="A"}
local function get_metric(key)
local key_index, key_len, key_tag = 0, #key, 0
return select(1, key:gsub('.', function (c)
key_index = key_index + 1
if key_tag == 0 then
if c == '.' then return '_' end
if c == ';' then key_tag = 1; return '{' end
elseif key_tag == 1 then
if key_index == key_len then
if c == '=' then return '=""}'
else return c .. '"}' end
end
if c == '=' then key_tag = 2; return '="' end
elseif key_tag == 2 then
if key_index == key_len then
if c == ';' then return '"}'
else return c .. '"}' end
end
if c == ';' then key_tag = 1; return '",' end
end
return nil
end))
end
-- Render stats in Prometheus text format
local function serve_prometheus()
-- First aggregate metrics list and print counters
local slist, render = getstats(), {}
local latency = {}
local counter = '# TYPE %s counter\n%s %f'
for k,v in pairs(slist) do
k = get_metric(k)
-- Aggregate histograms
local band = k:match('answer_([%d]+)ms')
if band then
table.insert(latency, {band, v})
elseif k == 'answer_slow' then
table.insert(latency, {'+Inf', v})
-- Counter as a fallback
else
local key = M.namespace .. k
local name, label = key:match('^([^{]+)(.*)$')
table.insert(render, string.format(counter, name, name .. label, v))
end
end
-- Fill in latency histogram
local function kweight(x) return tonumber(x) or math.huge end
table.sort(latency, function (a,b) return kweight(a[1]) < kweight(b[1]) end)
table.insert(render, string.format('# TYPE %slatency histogram', M.namespace))
local count, sum = 0.0, 0.0
for _,e in ipairs(latency) do
-- The information about the %Inf bin is lost, so we treat it
-- as a timeout (3000ms) for metrics purposes
count = count + e[2]
sum = sum + e[2] * (math.min(tonumber(e[1]), 3000.0))
table.insert(render, string.format('%slatency_bucket{le="%s"} %f', M.namespace, e[1], count))
end
table.insert(render, string.format('%slatency_count %f', M.namespace, count))
table.insert(render, string.format('%slatency_sum %f', M.namespace, sum))
-- Finalize metrics table before rendering
if type(M.finalize) == 'function' then
M.finalize(render)
end
return table.concat(render, '\n') .. '\n'
end
-- Export module interface
M.endpoints = {
['/stats'] = {'application/json', getstats, stream_stats},
['/frequent'] = {'application/json', function () return stats.frequent() end},
['/upstreams'] = {'application/json', function () return stats.upstreams() end},
['/bogus'] = {'application/json', function () return bogus_log.frequent() end},
['/metrics'] = {'text/plain; version=0.0.4', serve_prometheus},
}
return M