/* * This file is part of PowerDNS or dnsdist. * Copyright -- PowerDNS.COM B.V. and its contributors * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * In addition, for the avoidance of any doubt, permission is granted to * link this program with OpenSSL and to (re)distribute the binaries * produced as the result of such linking. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #include #include #include #include #include #include "ext/json11/json11.hpp" #include #include "base64.hh" #include "connection-management.hh" #include "dnsdist.hh" #include "dnsdist-dynblocks.hh" #include "dnsdist-healthchecks.hh" #include "dnsdist-prometheus.hh" #include "dnsdist-web.hh" #include "dolog.hh" #include "gettime.hh" #include "htmlfiles.h" #include "threadname.hh" #include "sodcrypto.hh" #include "sstuff.hh" struct WebserverConfig { WebserverConfig() { acl.toMasks("127.0.0.1, ::1"); } NetmaskGroup acl; std::unique_ptr password; std::unique_ptr apiKey; boost::optional > customHeaders; bool statsRequireAuthentication{true}; }; bool g_apiReadWrite{false}; LockGuarded g_webserverConfig; std::string g_apiConfigDirectory; static const MetricDefinitionStorage s_metricDefinitions; static ConcurrentConnectionManager s_connManager(100); std::string getWebserverConfig() { ostringstream out; { auto config = g_webserverConfig.lock(); out << "Current web server configuration:" << endl; out << "ACL: " << config->acl.toString() << endl; out << "Custom headers: "; if (config->customHeaders) { out << endl; for (const auto& header : *config->customHeaders) { out << " - " << header.first << ": " << header.second << endl; } } else { out << "None" << endl; } out << "Statistics require authentication: " << (config->statsRequireAuthentication ? "yes" : "no") << endl; out << "Password: " << (config->password ? "set" : "unset") << endl; out << "API key: " << (config->apiKey ? "set" : "unset") << endl; } out << "API writable: " << (g_apiReadWrite ? "yes" : "no") << endl; out << "API configuration directory: " << g_apiConfigDirectory << endl; out << "Maximum concurrent connections: " << s_connManager.getMaxConcurrentConnections() << endl; return out.str(); } class WebClientConnection { public: WebClientConnection(const ComboAddress& client, int fd): d_client(client), d_socket(fd) { if (!s_connManager.registerConnection()) { throw std::runtime_error("Too many concurrent web client connections"); } } WebClientConnection(WebClientConnection&& rhs): d_client(rhs.d_client), d_socket(std::move(rhs.d_socket)) { } WebClientConnection(const WebClientConnection&) = delete; WebClientConnection& operator=(const WebClientConnection&) = delete; ~WebClientConnection() { if (d_socket.getHandle() != -1) { s_connManager.releaseConnection(); } } const Socket& getSocket() const { return d_socket; } const ComboAddress& getClient() const { return d_client; } private: ComboAddress d_client; Socket d_socket; }; const std::map MetricDefinitionStorage::metrics{ { "responses", MetricDefinition(PrometheusMetricType::counter, "Number of responses received from backends") }, { "servfail-responses", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received from backends") }, { "queries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries")}, { "frontend-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers sent to clients")}, { "frontend-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers sent to clients")}, { "frontend-noerror", MetricDefinition(PrometheusMetricType::counter, "Number of NoError answers sent to clients")}, { "acl-drops", MetricDefinition(PrometheusMetricType::counter, "Number of packets dropped because of the ACL")}, { "rule-drop", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a rule")}, { "rule-nxdomain", MetricDefinition(PrometheusMetricType::counter, "Number of NXDomain answers returned because of a rule")}, { "rule-refused", MetricDefinition(PrometheusMetricType::counter, "Number of Refused answers returned because of a rule")}, { "rule-servfail", MetricDefinition(PrometheusMetricType::counter, "Number of SERVFAIL answers received because of a rule")}, { "rule-truncated", MetricDefinition(PrometheusMetricType::counter, "Number of truncated answers returned because of a rule")}, { "self-answered", MetricDefinition(PrometheusMetricType::counter, "Number of self-answered responses")}, { "downstream-timeouts", MetricDefinition(PrometheusMetricType::counter, "Number of queries not answered in time by a backend")}, { "downstream-send-errors", MetricDefinition(PrometheusMetricType::counter, "Number of errors when sending a query to a backend")}, { "trunc-failures", MetricDefinition(PrometheusMetricType::counter, "Number of errors encountered while truncating an answer")}, { "no-policy", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because no server was available")}, { "latency0-1", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in less than 1ms")}, { "latency1-10", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 1-10 ms")}, { "latency10-50", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 10-50 ms")}, { "latency50-100", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 50-100 ms")}, { "latency100-1000", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in 100-1000 ms")}, { "latency-slow", MetricDefinition(PrometheusMetricType::counter, "Number of queries answered in more than 1 second")}, { "latency-avg100", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 100 packets")}, { "latency-avg1000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000 packets")}, { "latency-avg10000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 10000 packets")}, { "latency-avg1000000", MetricDefinition(PrometheusMetricType::gauge, "Average response latency in microseconds of the last 1000000 packets")}, { "uptime", MetricDefinition(PrometheusMetricType::gauge, "Uptime of the dnsdist process in seconds")}, { "real-memory-usage", MetricDefinition(PrometheusMetricType::gauge, "Current memory usage in bytes")}, { "noncompliant-queries", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped as non-compliant")}, { "noncompliant-responses", MetricDefinition(PrometheusMetricType::counter, "Number of answers from a backend dropped as non-compliant")}, { "rdqueries", MetricDefinition(PrometheusMetricType::counter, "Number of received queries with the recursion desired bit set")}, { "empty-queries", MetricDefinition(PrometheusMetricType::counter, "Number of empty queries received from clients")}, { "cache-hits", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer was retrieved from cache")}, { "cache-misses", MetricDefinition(PrometheusMetricType::counter, "Number of times an answer not found in the cache")}, { "cpu-iowait", MetricDefinition(PrometheusMetricType::counter, "Time waiting for I/O to complete by the whole system, in units of USER_HZ")}, { "cpu-user-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the user state")}, { "cpu-steal", MetricDefinition(PrometheusMetricType::counter, "Stolen time, which is the time spent by the whole system in other operating systems when running in a virtualized environment, in units of USER_HZ")}, { "cpu-sys-msec", MetricDefinition(PrometheusMetricType::counter, "Milliseconds spent by dnsdist in the system state")}, { "fd-usage", MetricDefinition(PrometheusMetricType::gauge, "Number of currently used file descriptors")}, { "dyn-blocked", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of a dynamic block")}, { "dyn-block-nmg-size", MetricDefinition(PrometheusMetricType::gauge, "Number of dynamic blocks entries") }, { "security-status", MetricDefinition(PrometheusMetricType::gauge, "Security status of this software. 0=unknown, 1=OK, 2=upgrade recommended, 3=upgrade mandatory") }, { "doh-query-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of DoH queries dropped because the internal pipe used to distribute queries was full") }, { "doh-response-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of DoH responses dropped because the internal pipe used to distribute responses was full") }, { "outgoing-doh-query-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of outgoing DoH queries dropped because the internal pipe used to distribute queries was full") }, { "tcp-query-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of TCP queries dropped because the internal pipe used to distribute queries was full") }, { "tcp-cross-protocol-query-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of TCP cross-protocol queries dropped because the internal pipe used to distribute queries was full") }, { "tcp-cross-protocol-response-pipe-full", MetricDefinition(PrometheusMetricType::counter, "Number of TCP cross-protocol responses dropped because the internal pipe used to distribute queries was full") }, { "udp-in-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp InErrors") }, { "udp-noport-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp NoPorts") }, { "udp-recvbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp RcvbufErrors") }, { "udp-sndbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp SndbufErrors") }, { "udp-in-csum-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp InCsumErrors") }, { "udp6-in-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp6 Udp6InErrors") }, { "udp6-recvbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp6 Udp6RcvbufErrors") }, { "udp6-sndbuf-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp6 Udp6SndbufErrors") }, { "udp6-noport-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp6 Udp6NoPorts") }, { "udp6-in-csum-errors", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/snmp6 Udp6InCsumErrors") }, { "tcp-listen-overflows", MetricDefinition(PrometheusMetricType::counter, "From /proc/net/netstat ListenOverflows") }, { "proxy-protocol-invalid", MetricDefinition(PrometheusMetricType::counter, "Number of queries dropped because of an invalid Proxy Protocol header") }, }; static bool apiWriteConfigFile(const string& filebasename, const string& content) { if (!g_apiReadWrite) { errlog("Not writing content to %s since the API is read-only", filebasename); return false; } if (g_apiConfigDirectory.empty()) { vinfolog("Not writing content to %s since the API configuration directory is not set", filebasename); return false; } string filename = g_apiConfigDirectory + "/" + filebasename + ".conf"; ofstream ofconf(filename.c_str()); if (!ofconf) { errlog("Could not open configuration fragment file '%s' for writing: %s", filename, stringerror()); return false; } ofconf << "-- Generated by the REST API, DO NOT EDIT" << endl; ofconf << content << endl; ofconf.close(); return true; } static void apiSaveACL(const NetmaskGroup& nmg) { vector vec; nmg.toStringVector(&vec); string acl; for(const auto& s : vec) { if (!acl.empty()) { acl += ", "; } acl += "\"" + s + "\""; } string content = "setACL({" + acl + "})"; apiWriteConfigFile("acl", content); } static bool checkAPIKey(const YaHTTP::Request& req, const std::unique_ptr& apiKey) { if (!apiKey) { return false; } const auto header = req.headers.find("x-api-key"); if (header != req.headers.end()) { return apiKey->matches(header->second); } return false; } static bool checkWebPassword(const YaHTTP::Request& req, const std::unique_ptr& password) { static const char basicStr[] = "basic "; const auto header = req.headers.find("authorization"); if (header != req.headers.end() && toLower(header->second).find(basicStr) == 0) { string cookie = header->second.substr(sizeof(basicStr) - 1); string plain; B64Decode(cookie, plain); vector cparts; stringtok(cparts, plain, ":"); if (cparts.size() == 2) { if (password) { return password->matches(cparts.at(1)); } return true; } } return false; } static bool isAnAPIRequest(const YaHTTP::Request& req) { return req.url.path.find("/api/") == 0; } static bool isAnAPIRequestAllowedWithWebAuth(const YaHTTP::Request& req) { return req.url.path == "/api/v1/servers/localhost"; } static bool isAStatsRequest(const YaHTTP::Request& req) { return req.url.path == "/jsonstat" || req.url.path == "/metrics"; } static bool handleAuthorization(const YaHTTP::Request& req) { auto config = g_webserverConfig.lock(); if (isAStatsRequest(req)) { if (config->statsRequireAuthentication) { /* Access to the stats is allowed for both API and Web users */ return checkAPIKey(req, config->apiKey) || checkWebPassword(req, config->password); } return true; } if (isAnAPIRequest(req)) { /* Access to the API requires a valid API key */ if (checkAPIKey(req, config->apiKey)) { return true; } return isAnAPIRequestAllowedWithWebAuth(req) && checkWebPassword(req, config->password); } return checkWebPassword(req, config->password); } static bool isMethodAllowed(const YaHTTP::Request& req) { if (req.method == "GET") { return true; } if (req.method == "PUT" && g_apiReadWrite) { if (req.url.path == "/api/v1/servers/localhost/config/allow-from") { return true; } } return false; } static bool isClientAllowedByACL(const ComboAddress& remote) { return g_webserverConfig.lock()->acl.match(remote); } static void handleCORS(const YaHTTP::Request& req, YaHTTP::Response& resp) { const auto origin = req.headers.find("Origin"); if (origin != req.headers.end()) { if (req.method == "OPTIONS") { /* Pre-flight request */ if (g_apiReadWrite) { resp.headers["Access-Control-Allow-Methods"] = "GET, PUT"; } else { resp.headers["Access-Control-Allow-Methods"] = "GET"; } resp.headers["Access-Control-Allow-Headers"] = "Authorization, X-API-Key"; } resp.headers["Access-Control-Allow-Origin"] = origin->second; if (isAStatsRequest(req) || isAnAPIRequestAllowedWithWebAuth(req)) { resp.headers["Access-Control-Allow-Credentials"] = "true"; } } } static void addSecurityHeaders(YaHTTP::Response& resp, const boost::optional >& customHeaders) { static const std::vector > headers = { { "X-Content-Type-Options", "nosniff" }, { "X-Frame-Options", "deny" }, { "X-Permitted-Cross-Domain-Policies", "none" }, { "X-XSS-Protection", "1; mode=block" }, { "Content-Security-Policy", "default-src 'self'; style-src 'self' 'unsafe-inline'" }, }; for (const auto& h : headers) { if (customHeaders) { const auto& custom = customHeaders->find(h.first); if (custom != customHeaders->end()) { continue; } } resp.headers[h.first] = h.second; } } static void addCustomHeaders(YaHTTP::Response& resp, const boost::optional >& customHeaders) { if (!customHeaders) return; for (const auto& c : *customHeaders) { if (!c.second.empty()) { resp.headers[c.first] = c.second; } } } template static json11::Json::array someResponseRulesToJson(GlobalStateHolder>* someResponseRules) { using namespace json11; Json::array responseRules; int num=0; auto localResponseRules = someResponseRules->getLocal(); for(const auto& a : *localResponseRules) { Json::object rule{ {"id", num++}, {"creationOrder", (double)a.d_creationOrder}, {"uuid", boost::uuids::to_string(a.d_id)}, {"name", a.d_name}, {"matches", (double)a.d_rule->d_matches}, {"rule", a.d_rule->toString()}, {"action", a.d_action->toString()}, }; responseRules.push_back(rule); } return responseRules; } template static void addRulesToPrometheusOutput(std::ostringstream& output, GlobalStateHolder >& rules) { auto localRules = rules.getLocal(); for (const auto& entry : *localRules) { std::string id = !entry.d_name.empty() ? entry.d_name : boost::uuids::to_string(entry.d_id); output << "dnsdist_rule_hits{id=\"" << id << "\"} " << entry.d_rule->d_matches << "\n"; } } static void handlePrometheus(const YaHTTP::Request& req, YaHTTP::Response& resp) { handleCORS(req, resp); resp.status = 200; std::ostringstream output; static const std::set metricBlacklist = { "latency-count", "latency-sum" }; for (const auto& e : g_stats.entries) { if (e.first == "special-memory-usage") continue; // Too expensive for get-all std::string metricName = std::get<0>(e); // Prometheus suggest using '_' instead of '-' std::string prometheusMetricName = "dnsdist_" + boost::replace_all_copy(metricName, "-", "_"); if (metricBlacklist.count(metricName) != 0) { continue; } MetricDefinition metricDetails; if (!s_metricDefinitions.getMetricDetails(metricName, metricDetails)) { vinfolog("Do not have metric details for %s", metricName); continue; } std::string prometheusTypeName = s_metricDefinitions.getPrometheusStringMetricType(metricDetails.prometheusType); if (prometheusTypeName == "") { vinfolog("Unknown Prometheus type for %s", metricName); continue; } // for these we have the help and types encoded in the sources: output << "# HELP " << prometheusMetricName << " " << metricDetails.description << "\n"; output << "# TYPE " << prometheusMetricName << " " << prometheusTypeName << "\n"; output << prometheusMetricName << " "; if (const auto& val = boost::get(&std::get<1>(e))) output << (*val)->load(); else if (const auto& dval = boost::get(&std::get<1>(e))) output << **dval; else output << (*boost::get(&std::get<1>(e)))(std::get<0>(e)); output << "\n"; } // Latency histogram buckets output << "# HELP dnsdist_latency Histogram of responses by latency (in milliseconds)\n"; output << "# TYPE dnsdist_latency histogram\n"; uint64_t latency_amounts = g_stats.latency0_1; output << "dnsdist_latency_bucket{le=\"1\"} " << latency_amounts << "\n"; latency_amounts += g_stats.latency1_10; output << "dnsdist_latency_bucket{le=\"10\"} " << latency_amounts << "\n"; latency_amounts += g_stats.latency10_50; output << "dnsdist_latency_bucket{le=\"50\"} " << latency_amounts << "\n"; latency_amounts += g_stats.latency50_100; output << "dnsdist_latency_bucket{le=\"100\"} " << latency_amounts << "\n"; latency_amounts += g_stats.latency100_1000; output << "dnsdist_latency_bucket{le=\"1000\"} " << latency_amounts << "\n"; latency_amounts += g_stats.latencySlow; // Should be the same as latency_count output << "dnsdist_latency_bucket{le=\"+Inf\"} " << latency_amounts << "\n"; output << "dnsdist_latency_sum " << g_stats.latencySum << "\n"; output << "dnsdist_latency_count " << g_stats.latencyCount << "\n"; auto states = g_dstates.getLocal(); const string statesbase = "dnsdist_server_"; output << "# HELP " << statesbase << "status " << "Whether this backend is up (1) or down (0)" << "\n"; output << "# TYPE " << statesbase << "status " << "gauge" << "\n"; output << "# HELP " << statesbase << "queries " << "Amount of queries relayed to server" << "\n"; output << "# TYPE " << statesbase << "queries " << "counter" << "\n"; output << "# HELP " << statesbase << "responses " << "Amount of responses received from this server" << "\n"; output << "# TYPE " << statesbase << "responses " << "counter" << "\n"; output << "# HELP " << statesbase << "drops " << "Amount of queries not answered by server" << "\n"; output << "# TYPE " << statesbase << "drops " << "counter" << "\n"; output << "# HELP " << statesbase << "latency " << "Server's latency when answering questions in milliseconds" << "\n"; output << "# TYPE " << statesbase << "latency " << "gauge" << "\n"; output << "# HELP " << statesbase << "senderrors " << "Total number of OS send errors while relaying queries" << "\n"; output << "# TYPE " << statesbase << "senderrors " << "counter" << "\n"; output << "# HELP " << statesbase << "outstanding " << "Current number of queries that are waiting for a backend response" << "\n"; output << "# TYPE " << statesbase << "outstanding " << "gauge" << "\n"; output << "# HELP " << statesbase << "order " << "The order in which this server is picked" << "\n"; output << "# TYPE " << statesbase << "order " << "gauge" << "\n"; output << "# HELP " << statesbase << "weight " << "The weight within the order in which this server is picked" << "\n"; output << "# TYPE " << statesbase << "weight " << "gauge" << "\n"; output << "# HELP " << statesbase << "tcpdiedsendingquery " << "The number of TCP I/O errors while sending the query" << "\n"; output << "# TYPE " << statesbase << "tcpdiedsendingquery " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpdiedreadingresponse " << "The number of TCP I/O errors while reading the response" << "\n"; output << "# TYPE " << statesbase << "tcpdiedreadingresponse " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpgaveup " << "The number of TCP connections failing after too many attempts" << "\n"; output << "# TYPE " << statesbase << "tcpgaveup " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpconnecttimeouts " << "The number of TCP connect timeouts" << "\n"; output << "# TYPE " << statesbase << "tcpconnecttimeouts " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpreadtimeouts " << "The number of TCP read timeouts" << "\n"; output << "# TYPE " << statesbase << "tcpreadtimeouts " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpwritetimeouts " << "The number of TCP write timeouts" << "\n"; output << "# TYPE " << statesbase << "tcpwritetimeouts " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpcurrentconnections " << "The number of current TCP connections" << "\n"; output << "# TYPE " << statesbase << "tcpcurrentconnections " << "gauge" << "\n"; output << "# HELP " << statesbase << "tcpmaxconcurrentconnections " << "The maximum number of concurrent TCP connections" << "\n"; output << "# TYPE " << statesbase << "tcpmaxconcurrentconnections " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpnewconnections " << "The number of established TCP connections in total" << "\n"; output << "# TYPE " << statesbase << "tcpnewconnections " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpreusedconnections " << "The number of times a TCP connection has been reused" << "\n"; output << "# TYPE " << statesbase << "tcpreusedconnections " << "counter" << "\n"; output << "# HELP " << statesbase << "tcpavgqueriesperconn " << "The average number of queries per TCP connection" << "\n"; output << "# TYPE " << statesbase << "tcpavgqueriesperconn " << "gauge" << "\n"; output << "# HELP " << statesbase << "tcpavgconnduration " << "The average duration of a TCP connection (ms)" << "\n"; output << "# TYPE " << statesbase << "tcpavgconnduration " << "gauge" << "\n"; output << "# HELP " << statesbase << "tlsresumptions " << "The number of times a TLS session has been resumed" << "\n"; output << "# TYPE " << statesbase << "tlsersumptions " << "counter" << "\n"; for (const auto& state : *states) { string serverName; if (state->getName().empty()) serverName = state->remote.toStringWithPort(); else serverName = state->getName(); boost::replace_all(serverName, ".", "_"); const std::string label = boost::str(boost::format("{server=\"%1%\",address=\"%2%\"}") % serverName % state->remote.toStringWithPort()); output << statesbase << "status" << label << " " << (state->isUp() ? "1" : "0") << "\n"; output << statesbase << "queries" << label << " " << state->queries.load() << "\n"; output << statesbase << "responses" << label << " " << state->responses.load() << "\n"; output << statesbase << "drops" << label << " " << state->reuseds.load() << "\n"; if (state->isUp()) output << statesbase << "latency" << label << " " << state->latencyUsec/1000.0 << "\n"; output << statesbase << "senderrors" << label << " " << state->sendErrors.load() << "\n"; output << statesbase << "outstanding" << label << " " << state->outstanding.load() << "\n"; output << statesbase << "order" << label << " " << state->order << "\n"; output << statesbase << "weight" << label << " " << state->weight << "\n"; output << statesbase << "tcpdiedsendingquery" << label << " " << state->tcpDiedSendingQuery << "\n"; output << statesbase << "tcpdiedreadingresponse" << label << " " << state->tcpDiedReadingResponse << "\n"; output << statesbase << "tcpgaveup" << label << " " << state->tcpGaveUp << "\n"; output << statesbase << "tcpreadtimeouts" << label << " " << state->tcpReadTimeouts << "\n"; output << statesbase << "tcpwritetimeouts" << label << " " << state->tcpWriteTimeouts << "\n"; output << statesbase << "tcpconnecttimeouts" << label << " " << state->tcpConnectTimeouts << "\n"; output << statesbase << "tcpcurrentconnections" << label << " " << state->tcpCurrentConnections << "\n"; output << statesbase << "tcpmaxconcurrentconnections" << label << " " << state->tcpMaxConcurrentConnections << "\n"; output << statesbase << "tcpnewconnections" << label << " " << state->tcpNewConnections << "\n"; output << statesbase << "tcpreusedconnections" << label << " " << state->tcpReusedConnections << "\n"; output << statesbase << "tcpavgqueriesperconn" << label << " " << state->tcpAvgQueriesPerConnection << "\n"; output << statesbase << "tcpavgconnduration" << label << " " << state->tcpAvgConnectionDuration << "\n"; output << statesbase << "tlsresumptions" << label << " " << state->tlsResumptions << "\n"; } const string frontsbase = "dnsdist_frontend_"; output << "# HELP " << frontsbase << "queries " << "Amount of queries received by this frontend" << "\n"; output << "# TYPE " << frontsbase << "queries " << "counter" << "\n"; output << "# HELP " << frontsbase << "responses " << "Amount of responses sent by this frontend" << "\n"; output << "# TYPE " << frontsbase << "responses " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpdiedreadingquery " << "Amount of TCP connections terminated while reading the query from the client" << "\n"; output << "# TYPE " << frontsbase << "tcpdiedreadingquery " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpdiedsendingresponse " << "Amount of TCP connections terminated while sending a response to the client" << "\n"; output << "# TYPE " << frontsbase << "tcpdiedsendingresponse " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpgaveup " << "Amount of TCP connections terminated after too many attempts to get a connection to the backend" << "\n"; output << "# TYPE " << frontsbase << "tcpgaveup " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpclientimeouts " << "Amount of TCP connections terminated by a timeout while reading from the client" << "\n"; output << "# TYPE " << frontsbase << "tcpclientimeouts " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpdownstreamtimeouts " << "Amount of TCP connections terminated by a timeout while reading from the backend" << "\n"; output << "# TYPE " << frontsbase << "tcpdownstreamtimeouts " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpcurrentconnections " << "Amount of current incoming TCP connections from clients" << "\n"; output << "# TYPE " << frontsbase << "tcpcurrentconnections " << "gauge" << "\n"; output << "# HELP " << frontsbase << "tcpmaxconcurrentconnections " << "Maximum number of concurrent incoming TCP connections from clients" << "\n"; output << "# TYPE " << frontsbase << "tcpmaxconcurrentconnections " << "counter" << "\n"; output << "# HELP " << frontsbase << "tcpavgqueriesperconnection " << "The average number of queries per TCP connection" << "\n"; output << "# TYPE " << frontsbase << "tcpavgqueriesperconnection " << "gauge" << "\n"; output << "# HELP " << frontsbase << "tcpavgconnectionduration " << "The average duration of a TCP connection (ms)" << "\n"; output << "# TYPE " << frontsbase << "tcpavgconnectionduration " << "gauge" << "\n"; output << "# HELP " << frontsbase << "tlsqueries " << "Number of queries received by dnsdist over TLS, by TLS version" << "\n"; output << "# TYPE " << frontsbase << "tlsqueries " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlsnewsessions " << "Amount of new TLS sessions negotiated" << "\n"; output << "# TYPE " << frontsbase << "tlsnewsessions " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlsresumptions " << "Amount of TLS sessions resumed" << "\n"; output << "# TYPE " << frontsbase << "tlsresumptions " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlsunknownticketkeys " << "Amount of attempts to resume TLS session from an unknown key (possibly expired)" << "\n"; output << "# TYPE " << frontsbase << "tlsunknownticketkeys " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlsinactiveticketkeys " << "Amount of TLS sessions resumed from an inactive key" << "\n"; output << "# TYPE " << frontsbase << "tlsinactiveticketkeys " << "counter" << "\n"; output << "# HELP " << frontsbase << "tlshandshakefailures " << "Amount of TLS handshake failures" << "\n"; output << "# TYPE " << frontsbase << "tlshandshakefailures " << "counter" << "\n"; std::map frontendDuplicates; for (const auto& front : g_frontends) { if (front->udpFD == -1 && front->tcpFD == -1) continue; const string frontName = front->local.toStringWithPort(); const string proto = front->getType(); const string fullName = frontName + "_" + proto; uint64_t threadNumber = 0; auto dupPair = frontendDuplicates.insert({fullName, 1}); if (!dupPair.second) { threadNumber = dupPair.first->second; ++(dupPair.first->second); } const std::string label = boost::str(boost::format("{frontend=\"%1%\",proto=\"%2%\",thread=\"%3%\"} ") % frontName % proto % threadNumber); output << frontsbase << "queries" << label << front->queries.load() << "\n"; output << frontsbase << "responses" << label << front->responses.load() << "\n"; if (front->isTCP()) { output << frontsbase << "tcpdiedreadingquery" << label << front->tcpDiedReadingQuery.load() << "\n"; output << frontsbase << "tcpdiedsendingresponse" << label << front->tcpDiedSendingResponse.load() << "\n"; output << frontsbase << "tcpgaveup" << label << front->tcpGaveUp.load() << "\n"; output << frontsbase << "tcpclientimeouts" << label << front->tcpClientTimeouts.load() << "\n"; output << frontsbase << "tcpdownstreamtimeouts" << label << front->tcpDownstreamTimeouts.load() << "\n"; output << frontsbase << "tcpcurrentconnections" << label << front->tcpCurrentConnections.load() << "\n"; output << frontsbase << "tcpmaxconcurrentconnections" << label << front->tcpMaxConcurrentConnections.load() << "\n"; output << frontsbase << "tcpavgqueriesperconnection" << label << front->tcpAvgQueriesPerConnection.load() << "\n"; output << frontsbase << "tcpavgconnectionduration" << label << front->tcpAvgConnectionDuration.load() << "\n"; if (front->hasTLS()) { output << frontsbase << "tlsnewsessions" << label << front->tlsNewSessions.load() << "\n"; output << frontsbase << "tlsresumptions" << label << front->tlsResumptions.load() << "\n"; output << frontsbase << "tlsunknownticketkeys" << label << front->tlsUnknownTicketKey.load() << "\n"; output << frontsbase << "tlsinactiveticketkeys" << label << front->tlsInactiveTicketKey.load() << "\n"; output << frontsbase << "tlsqueries{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",tls=\"tls10\"} " << front->tls10queries.load() << "\n"; output << frontsbase << "tlsqueries{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",tls=\"tls11\"} " << front->tls11queries.load() << "\n"; output << frontsbase << "tlsqueries{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",tls=\"tls12\"} " << front->tls12queries.load() << "\n"; output << frontsbase << "tlsqueries{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",tls=\"tls13\"} " << front->tls13queries.load() << "\n"; output << frontsbase << "tlsqueries{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",tls=\"unknown\"} " << front->tlsUnknownqueries.load() << "\n"; const TLSErrorCounters* errorCounters = nullptr; if (front->tlsFrontend != nullptr) { errorCounters = &front->tlsFrontend->d_tlsCounters; } else if (front->dohFrontend != nullptr) { errorCounters = &front->dohFrontend->d_tlsCounters; } if (errorCounters != nullptr) { output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"dhKeyTooSmall\"} " << errorCounters->d_dhKeyTooSmall << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"inappropriateFallBack\"} " << errorCounters->d_inappropriateFallBack << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"noSharedCipher\"} " << errorCounters->d_noSharedCipher << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"unknownCipherType\"} " << errorCounters->d_unknownCipherType << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"unknownKeyExchangeType\"} " << errorCounters->d_unknownKeyExchangeType << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"unknownProtocol\"} " << errorCounters->d_unknownProtocol << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"unsupportedEC\"} " << errorCounters->d_unsupportedEC << "\n"; output << frontsbase << "tlshandshakefailures{frontend=\"" << frontName << "\",proto=\"" << proto << "\",thread=\"" << threadNumber << "\",error=\"unsupportedProtocol\"} " << errorCounters->d_unsupportedProtocol << "\n"; } } } } output << "# HELP " << frontsbase << "http_connects " << "Number of DoH TCP connections established to this frontend" << "\n"; output << "# TYPE " << frontsbase << "http_connects " << "counter" << "\n"; output << "# HELP " << frontsbase << "doh_http_method_queries " << "Number of DoH queries received by dnsdist, by HTTP method" << "\n"; output << "# TYPE " << frontsbase << "doh_http_method_queries " << "counter" << "\n"; output << "# HELP " << frontsbase << "doh_http_version_queries " << "Number of DoH queries received by dnsdist, by HTTP version" << "\n"; output << "# TYPE " << frontsbase << "doh_http_version_queries " << "counter" << "\n"; output << "# HELP " << frontsbase << "doh_bad_requests " << "Number of requests that could not be converted to a DNS query" << "\n"; output << "# TYPE " << frontsbase << "doh_bad_requests " << "counter" << "\n"; output << "# HELP " << frontsbase << "doh_responses " << "Number of responses sent, by type" << "\n"; output << "# TYPE " << frontsbase << "doh_responses " << "counter" << "\n"; output << "# HELP " << frontsbase << "doh_version_status_responses " << "Number of requests that could not be converted to a DNS query" << "\n"; output << "# TYPE " << frontsbase << "doh_version_status_responses " << "counter" << "\n"; #ifdef HAVE_DNS_OVER_HTTPS std::map dohFrontendDuplicates; for(const auto& doh : g_dohlocals) { const string frontName = doh->d_local.toStringWithPort(); uint64_t threadNumber = 0; auto dupPair = frontendDuplicates.insert({frontName, 1}); if (!dupPair.second) { threadNumber = dupPair.first->second; ++(dupPair.first->second); } const std::string addrlabel = boost::str(boost::format("frontend=\"%1%\",thread=\"%2%\"") % frontName % threadNumber); const std::string label = "{" + addrlabel + "} "; output << frontsbase << "http_connects" << label << doh->d_httpconnects << "\n"; output << frontsbase << "doh_http_method_queries{method=\"get\"," << addrlabel << "} " << doh->d_getqueries << "\n"; output << frontsbase << "doh_http_method_queries{method=\"post\"," << addrlabel << "} " << doh->d_postqueries << "\n"; output << frontsbase << "doh_http_version_queries{version=\"1\"," << addrlabel << "} " << doh->d_http1Stats.d_nbQueries << "\n"; output << frontsbase << "doh_http_version_queries{version=\"2\"," << addrlabel << "} " << doh->d_http2Stats.d_nbQueries << "\n"; output << frontsbase << "doh_bad_requests{" << addrlabel << "} " << doh->d_badrequests << "\n"; output << frontsbase << "doh_responses{type=\"error\"," << addrlabel << "} " << doh->d_errorresponses << "\n"; output << frontsbase << "doh_responses{type=\"redirect\"," << addrlabel << "} " << doh->d_redirectresponses << "\n"; output << frontsbase << "doh_responses{type=\"valid\"," << addrlabel << "} " << doh->d_validresponses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"200\"," << addrlabel << "} " << doh->d_http1Stats.d_nb200Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"400\"," << addrlabel << "} " << doh->d_http1Stats.d_nb400Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"403\"," << addrlabel << "} " << doh->d_http1Stats.d_nb403Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"500\"," << addrlabel << "} " << doh->d_http1Stats.d_nb500Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"502\"," << addrlabel << "} " << doh->d_http1Stats.d_nb502Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"1\",status=\"other\"," << addrlabel << "} " << doh->d_http1Stats.d_nbOtherResponses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"200\"," << addrlabel << "} " << doh->d_http2Stats.d_nb200Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"400\"," << addrlabel << "} " << doh->d_http2Stats.d_nb400Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"403\"," << addrlabel << "} " << doh->d_http2Stats.d_nb403Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"500\"," << addrlabel << "} " << doh->d_http2Stats.d_nb500Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"502\"," << addrlabel << "} " << doh->d_http2Stats.d_nb502Responses << "\n"; output << frontsbase << "doh_version_status_responses{httpversion=\"2\",status=\"other\"," << addrlabel << "} " << doh->d_http2Stats.d_nbOtherResponses << "\n"; } #endif /* HAVE_DNS_OVER_HTTPS */ auto localPools = g_pools.getLocal(); const string cachebase = "dnsdist_pool_"; output << "# HELP dnsdist_pool_servers " << "Number of servers in that pool" << "\n"; output << "# TYPE dnsdist_pool_servers " << "gauge" << "\n"; output << "# HELP dnsdist_pool_active_servers " << "Number of available servers in that pool" << "\n"; output << "# TYPE dnsdist_pool_active_servers " << "gauge" << "\n"; output << "# HELP dnsdist_pool_cache_size " << "Maximum number of entries that this cache can hold" << "\n"; output << "# TYPE dnsdist_pool_cache_size " << "gauge" << "\n"; output << "# HELP dnsdist_pool_cache_entries " << "Number of entries currently present in that cache" << "\n"; output << "# TYPE dnsdist_pool_cache_entries " << "gauge" << "\n"; output << "# HELP dnsdist_pool_cache_hits " << "Number of hits from that cache" << "\n"; output << "# TYPE dnsdist_pool_cache_hits " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_misses " << "Number of misses from that cache" << "\n"; output << "# TYPE dnsdist_pool_cache_misses " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_deferred_inserts " << "Number of insertions into that cache skipped because it was already locked" << "\n"; output << "# TYPE dnsdist_pool_cache_deferred_inserts " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_deferred_lookups " << "Number of lookups into that cache skipped because it was already locked" << "\n"; output << "# TYPE dnsdist_pool_cache_deferred_lookups " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_lookup_collisions " << "Number of lookups into that cache that triggered a collision (same hash but different entry)" << "\n"; output << "# TYPE dnsdist_pool_cache_lookup_collisions " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_insert_collisions " << "Number of insertions into that cache that triggered a collision (same hash but different entry)" << "\n"; output << "# TYPE dnsdist_pool_cache_insert_collisions " << "counter" << "\n"; output << "# HELP dnsdist_pool_cache_ttl_too_shorts " << "Number of insertions into that cache skipped because the TTL of the answer was not long enough" << "\n"; output << "# TYPE dnsdist_pool_cache_ttl_too_shorts " << "counter" << "\n"; for (const auto& entry : *localPools) { string poolName = entry.first; if (poolName.empty()) { poolName = "_default_"; } const string label = "{pool=\"" + poolName + "\"}"; const std::shared_ptr pool = entry.second; output << "dnsdist_pool_servers" << label << " " << pool->countServers(false) << "\n"; output << "dnsdist_pool_active_servers" << label << " " << pool->countServers(true) << "\n"; if (pool->packetCache != nullptr) { const auto& cache = pool->packetCache; output << cachebase << "cache_size" <