summaryrefslogtreecommitdiffstats
path: root/lib/selection.h
blob: 34cc69c4d788de5e3ca4d028495a4777e0338106 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
/*  Copyright (C) CZ.NIC, z.s.p.o. <knot-resolver@labs.nic.cz>
 *  SPDX-License-Identifier: GPL-3.0-or-later
 */

#pragma once

/**
 * @file selection.h
 * Provides server selection API (see `kr_server_selection`)
 * and functions common to both implementations.
 */

#include "lib/cache/api.h"

/* After KR_NS_TIMEOUT_ROW_DEAD consecutive timeouts
 * where at least one was over KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT ms,
 * we consider the upstream IP dead for KR_NS_TIMEOUT_RETRY_INTERVAL ms */
#define KR_NS_TIMEOUT_ROW_DEAD 4
#define KR_NS_TIMEOUT_MIN_DEAD_TIMEOUT 800 /* == DEFAULT_TIMEOUT * 2 */
#define KR_NS_TIMEOUT_RETRY_INTERVAL 1000

/**
 * These errors are to be reported as feedback to server selection.
 * See `kr_server_selection::error` for more details.
 */
enum kr_selection_error {
	KR_SELECTION_OK = 0,

	// Network errors
	KR_SELECTION_QUERY_TIMEOUT,
	KR_SELECTION_TLS_HANDSHAKE_FAILED,
	KR_SELECTION_TCP_CONNECT_FAILED,
	KR_SELECTION_TCP_CONNECT_TIMEOUT,

	// RCODEs
	KR_SELECTION_REFUSED,
	KR_SELECTION_SERVFAIL,
	KR_SELECTION_FORMERR,      /// inside an answer without an OPT record
	KR_SELECTION_FORMERR_EDNS, /// with an OPT record
	KR_SELECTION_NOTIMPL,
	KR_SELECTION_OTHER_RCODE,

	// DNS errors
	KR_SELECTION_MALFORMED,
	/** Name or type mismatch. */
	KR_SELECTION_MISMATCHED,
	KR_SELECTION_TRUNCATED,
	KR_SELECTION_DNSSEC_ERROR,
	KR_SELECTION_LAME_DELEGATION,
	/** Too long chain, or a cycle. */
	KR_SELECTION_BAD_CNAME,

	/** Leave this last, as it is used as array size. */
	KR_SELECTION_NUMBER_OF_ERRORS
};

enum kr_transport_protocol {
	/** Selected name with no IPv4 address, it has to be resolved first. */
	KR_TRANSPORT_RESOLVE_A,
	/** Selected name with no IPv6 address, it has to be resolved first. */
	KR_TRANSPORT_RESOLVE_AAAA,
	KR_TRANSPORT_UDP,
	KR_TRANSPORT_TCP,
	KR_TRANSPORT_TLS,
};

/**
 * Output of the selection algorithm.
 */
struct kr_transport {
	knot_dname_t *ns_name; /**< Set to "." for forwarding targets.*/
	union kr_sockaddr address;
	size_t address_len;
	enum kr_transport_protocol protocol;
	unsigned timeout; /**< Timeout in ms to be set for UDP transmission. */
	/** Timeout was capped to a maximum value based on the other candidates
	 * when choosing this transport. The timeout therefore can be much lower
	 * than what we expect it to be. We basically probe the server for a sudden
	 * network change but we expect it to timeout in most cases. We have to keep
	 * this in mind when noting the timeout in cache. */
	bool timeout_capped;
	/** True iff transport was set in worker.c:subreq_finalize,
	 * that means it may be different from the one originally chosen one.*/
	bool deduplicated;
};

struct local_state {
	int timeouts; /**< Number of timeouts that occurred resolving this query.*/
	bool truncated; /**< Query was truncated, switch to TCP. */
	/** Force resolution of a new NS name (if possible)
	 * Done by selection.c:error in some cases. */
	bool force_resolve;
	/** Used to work around auths with broken TCP. */
	bool force_udp;
	void *private; /**< Inner state of the implementation.*/
};

/**
 * Specifies a API for selecting transports and giving feedback on the choices.
 *
 * The function pointers are to be used throughout resolver when some information about
 * the transport is obtained. E.g. RTT in `worker.c` or RCODE in `iterate.c`,…
 */
struct kr_server_selection {
	bool initialized;
	/**
	 * Puts a pointer to next transport of @p qry to @p transport .
	 *
	 * Allocates new kr_transport in request's mempool, chooses transport to be used for this query.
	 * Selection may fail, so @p transport can be set to NULL.
	 *
	 * @param transport to be filled with pointer to the chosen transport or NULL on failure
	 */
	void (*choose_transport)(struct kr_query *qry,
				 struct kr_transport **transport);
	/** Report back the RTT of network operation for transport in ms. */
	void (*update_rtt)(struct kr_query *qry,
			   const struct kr_transport *transport, unsigned rtt);
	/** Report back error encountered with the chosen transport. See `enum kr_selection` */
	void (*error)(struct kr_query *qry,
		      const struct kr_transport *transport,
		      enum kr_selection_error error);

	struct local_state *local_state;
};

/**
 * @brief Initialize the server selection API for @p qry.
 *
 * The implementation is to be chosen based on qry->flags.
 */
KR_EXPORT
void kr_server_selection_init(struct kr_query *qry);

/**
 * @brief Add forwarding target to request.
 *
 * This is exposed to Lua in order to add forwarding targets to request.
 * These are then shared by all the queries in said request.
 */
KR_EXPORT
int kr_forward_add_target(struct kr_request *req, const struct sockaddr *sock);





/* Below are internal parts shared by ./selection_{forward,iter}.c */

/**
 * To be held per IP address in the global LMDB cache
 */
struct rtt_state {
	int32_t srtt; /**< Smoothed RTT, i.e. an estimate of round-trip time. */
	int32_t variance; /**< An estimate of RTT's standard derivation (not variance). */
	/** Note: some TCP and TLS failures are also considered as timeouts. */
	int32_t consecutive_timeouts;
	/** Timestamp of pronouncing this IP bad based on KR_NS_TIMEOUT_ROW_DEAD */
	uint64_t dead_since;
};

/**
 * @brief To be held per IP address and locally "inside" query.
 */
struct address_state {
	/** Used to distinguish old and valid records in local_state; -1 means unusable IP. */
	unsigned int generation;
	struct rtt_state rtt_state;
	knot_dname_t *ns_name;
	bool tls_capable : 1;
	/* TODO: uncomment these once we actually use this information in selection
	bool tcp_waiting : 1;
	bool tcp_connected : 1;
	*/
	int choice_array_index;
	int error_count;
	bool broken;
	int errors[KR_SELECTION_NUMBER_OF_ERRORS];
};

/**
 * @brief Array of these is one of inputs for the actual selection algorithm (`select_transport`)
 */
struct choice {
	union kr_sockaddr address;
	size_t address_len;
	struct address_state *address_state;
	/** used to overwrite the port number;
	 * if zero, `select_transport` determines it. */
	uint16_t port;
};

/**
 * @brief Array of these is description of names to be resolved (i.e. name without some address)
 */
struct to_resolve {
	knot_dname_t *name;
	/** Either KR_TRANSPORT_RESOLVE_A or KR_TRANSPORT_RESOLVE_AAAA is valid here. */
	enum kr_transport_protocol type;
};

/**
 * @brief Based on passed choices, choose the next transport.
 *
 * Common function to both implementations (iteration and forwarding).
 * The `*_choose_transport` functions from `selection_*.h` preprocess the input for this one.
 *
 * @param choices Options to choose from, see struct above
 * @param unresolved Array of names that can be resolved (i.e. no A/AAAA record)
 * @param timeouts Number of timeouts that occurred in this query (used for exponential backoff)
 * @param mempool Memory context of current request
 * @param tcp Force TCP as transport protocol
 * @param[out] choice_index Optionally index of the chosen transport in the @p choices array.
 * @return Chosen transport (on mempool) or NULL when no choice is viable
 */
struct kr_transport *select_transport(const struct choice choices[], int choices_len,
				      const struct to_resolve unresolved[],
				      int unresolved_len, int timeouts,
				      struct knot_mm *mempool, bool tcp,
				      size_t *choice_index);

/**
 * Common part of RTT feedback mechanism. Notes RTT to global cache.
 */
void update_rtt(struct kr_query *qry, struct address_state *addr_state,
		const struct kr_transport *transport, unsigned rtt);

/**
 * Common part of error feedback mechanism.
 */
void error(struct kr_query *qry, struct address_state *addr_state,
	   const struct kr_transport *transport,
	   enum kr_selection_error sel_error);

/**
 * Get RTT state from cache. Returns `default_rtt_state` on unknown addresses.
 *
 * Note that this opens a cache transaction which is usually closed by calling
 * `put_rtt_state`, i.e. callee is responsible for its closing
 * (e.g. calling kr_cache_commit).
 */
struct rtt_state get_rtt_state(const uint8_t *ip, size_t len,
			       struct kr_cache *cache);

int put_rtt_state(const uint8_t *ip, size_t len, struct rtt_state state,
		  struct kr_cache *cache);

/**
 * @internal Helper function for conversion between different IP representations.
 */
void bytes_to_ip(uint8_t *bytes, size_t len, uint16_t port, union kr_sockaddr *dst);

/**
 * @internal Helper function for conversion between different IP representations.
 */
uint8_t *ip_to_bytes(const union kr_sockaddr *src, size_t len);

/**
 * @internal Fetch per-address information from various sources.
 *
 * Note that this opens a RO cache transaction; the callee is responsible
 * for its closing not too long afterwards (e.g. calling kr_cache_commit).
 */
void update_address_state(struct address_state *state, union kr_sockaddr *address,
			  size_t address_len, struct kr_query *qry);

/** @internal Return whether IPv6 is considered to be broken. */
bool no6_is_bad(void);